You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by me...@apache.org on 2018/03/22 07:58:58 UTC
[beam-site] 03/06: Add python reference docs for 2.4.0.
This is an automated email from the ASF dual-hosted git repository.
mergebot-role pushed a commit to branch mergebot
in repository https://gitbox.apache.org/repos/asf/beam-site.git
commit bfa7d50c72547df1c8fa65d0b809828c839e2b78
Author: Robert Bradshaw <ro...@google.com>
AuthorDate: Wed Mar 7 00:27:39 2018 -0800
Add python reference docs for 2.4.0.
---
src/documentation/sdks/pydoc/2.4.0/.buildinfo | 4 +
.../sdks/pydoc/2.4.0/_modules/abc.html | 419 +
.../2.4.0/_modules/apache_beam/coders/coders.html | 1216 +++
.../_modules/apache_beam/coders/observable.html | 277 +
.../_modules/apache_beam/coders/slow_stream.html | 408 +
.../pydoc/2.4.0/_modules/apache_beam/error.html | 280 +
.../_modules/apache_beam/internal/gcp/auth.html | 365 +
.../apache_beam/internal/gcp/json_value.html | 397 +
.../_modules/apache_beam/internal/pickler.html | 481 +
.../2.4.0/_modules/apache_beam/internal/util.html | 372 +
.../2.4.0/_modules/apache_beam/io/avroio.html | 701 ++
.../_modules/apache_beam/io/concat_source.html | 503 +
.../_modules/apache_beam/io/filebasedsink.html | 537 +
.../_modules/apache_beam/io/filebasedsource.html | 649 ++
.../2.4.0/_modules/apache_beam/io/filesystem.html | 824 ++
.../_modules/apache_beam/io/filesystemio.html | 501 +
.../2.4.0/_modules/apache_beam/io/filesystems.html | 493 +
.../_modules/apache_beam/io/gcp/bigquery.html | 1682 ++++
.../io/gcp/datastore/v1/adaptive_throttler.html | 328 +
.../io/gcp/datastore/v1/datastoreio.html | 745 ++
.../io/gcp/datastore/v1/fake_datastore.html | 340 +
.../apache_beam/io/gcp/datastore/v1/helper.html | 549 +
.../io/gcp/datastore/v1/query_splitter.html | 509 +
.../apache_beam/io/gcp/datastore/v1/util.html | 329 +
.../_modules/apache_beam/io/gcp/gcsfilesystem.html | 531 +
.../2.4.0/_modules/apache_beam/io/gcp/gcsio.html | 809 ++
.../2.4.0/_modules/apache_beam/io/gcp/pubsub.html | 444 +
.../_modules/apache_beam/io/hadoopfilesystem.html | 595 ++
.../2.4.0/_modules/apache_beam/io/iobase.html | 1328 +++
.../_modules/apache_beam/io/localfilesystem.html | 501 +
.../_modules/apache_beam/io/range_trackers.html | 645 ++
.../apache_beam/io/restriction_trackers.html | 365 +
.../_modules/apache_beam/io/source_test_utils.html | 896 ++
.../2.4.0/_modules/apache_beam/io/textio.html | 818 ++
.../2.4.0/_modules/apache_beam/io/tfrecordio.html | 558 +
.../pydoc/2.4.0/_modules/apache_beam/io/vcfio.html | 714 ++
.../2.4.0/_modules/apache_beam/metrics/cells.html | 735 ++
.../2.4.0/_modules/apache_beam/metrics/metric.html | 463 +
.../_modules/apache_beam/metrics/metricbase.html | 347 +
.../apache_beam/options/pipeline_options.html | 935 ++
.../options/pipeline_options_validator.html | 435 +
.../apache_beam/options/value_provider.html | 360 +
.../pydoc/2.4.0/_modules/apache_beam/pipeline.html | 1130 +++
.../api/beam_artifact_api_pb2_grpc.html | 367 +
.../portability/api/beam_fn_api_pb2_grpc.html | 422 +
.../portability/api/beam_job_api_pb2_grpc.html | 366 +
.../api/beam_provision_api_pb2_grpc.html | 284 +
.../pydoc/2.4.0/_modules/apache_beam/pvalue.html | 784 ++
.../runners/dataflow/dataflow_metrics.html | 442 +
.../runners/dataflow/dataflow_runner.html | 1269 +++
.../runners/dataflow/native_io/iobase.html | 559 +
.../dataflow/native_io/streaming_create.html | 306 +
.../runners/dataflow/ptransform_overrides.html | 282 +
.../runners/dataflow/test_dataflow_runner.html | 289 +
.../apache_beam/runners/direct/bundle_factory.html | 438 +
.../_modules/apache_beam/runners/direct/clock.html | 285 +
.../direct/consumer_tracking_pipeline_visitor.html | 292 +
.../apache_beam/runners/direct/direct_metrics.html | 358 +
.../apache_beam/runners/direct/direct_runner.html | 659 ++
.../runners/direct/evaluation_context.html | 561 ++
.../apache_beam/runners/direct/executor.html | 883 ++
.../runners/direct/helper_transforms.html | 339 +
.../runners/direct/sdf_direct_runner.html | 594 ++
.../runners/direct/transform_evaluator.html | 1138 +++
.../_modules/apache_beam/runners/direct/util.html | 309 +
.../runners/direct/watermark_manager.html | 504 +
.../python_rpc_direct_runner.html | 344 +
.../experimental/python_rpc_direct/server.html | 345 +
.../_modules/apache_beam/runners/job/manager.html | 286 +
.../_modules/apache_beam/runners/job/utils.html | 266 +
.../apache_beam/runners/pipeline_context.html | 346 +
.../2.4.0/_modules/apache_beam/runners/runner.html | 619 ++
.../_modules/apache_beam/runners/sdf_common.html | 401 +
.../apache_beam/testing/pipeline_verifiers.html | 387 +
.../apache_beam/testing/test_pipeline.html | 408 +
.../_modules/apache_beam/testing/test_stream.html | 413 +
.../_modules/apache_beam/testing/test_utils.html | 365 +
.../2.4.0/_modules/apache_beam/testing/util.html | 396 +
.../_modules/apache_beam/transforms/combiners.html | 838 ++
.../_modules/apache_beam/transforms/core.html | 2068 ++++
.../_modules/apache_beam/transforms/display.html | 574 ++
.../apache_beam/transforms/ptransform.html | 1057 ++
.../apache_beam/transforms/sideinputs.html | 319 +
.../_modules/apache_beam/transforms/timeutil.html | 361 +
.../_modules/apache_beam/transforms/trigger.html | 1494 +++
.../_modules/apache_beam/transforms/util.html | 771 ++
.../_modules/apache_beam/transforms/window.html | 721 ++
.../_modules/apache_beam/typehints/decorators.html | 815 ++
.../typehints/native_type_compatibility.html | 400 +
.../_modules/apache_beam/typehints/opcodes.html | 617 ++
.../apache_beam/typehints/trivial_inference.html | 680 ++
.../_modules/apache_beam/typehints/typecheck.html | 490 +
.../_modules/apache_beam/typehints/typehints.html | 1334 +++
.../_modules/apache_beam/utils/annotations.html | 345 +
.../2.4.0/_modules/apache_beam/utils/plugin.html | 276 +
.../_modules/apache_beam/utils/processes.html | 290 +
.../2.4.0/_modules/apache_beam/utils/profiler.html | 390 +
.../_modules/apache_beam/utils/proto_utils.html | 306 +
.../2.4.0/_modules/apache_beam/utils/retry.html | 447 +
.../_modules/apache_beam/utils/timestamp.html | 450 +
.../2.4.0/_modules/apache_beam/utils/urns.html | 344 +
.../sdks/pydoc/2.4.0/_modules/index.html | 332 +
.../_sources/apache_beam.coders.coders.rst.txt | 7 +
.../_sources/apache_beam.coders.observable.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.coders.rst.txt | 18 +
.../apache_beam.coders.slow_stream.rst.txt | 7 +
.../_sources/apache_beam.coders.typecoders.rst.txt | 7 +
.../pydoc/2.4.0/_sources/apache_beam.error.rst.txt | 7 +
.../_sources/apache_beam.internal.gcp.auth.rst.txt | 7 +
.../apache_beam.internal.gcp.json_value.rst.txt | 7 +
.../_sources/apache_beam.internal.gcp.rst.txt | 16 +
.../_sources/apache_beam.internal.pickler.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.internal.rst.txt | 23 +
.../_sources/apache_beam.internal.util.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.io.avroio.rst.txt | 7 +
.../_sources/apache_beam.io.concat_source.rst.txt | 7 +
.../_sources/apache_beam.io.filebasedsink.rst.txt | 7 +
.../apache_beam.io.filebasedsource.rst.txt | 7 +
.../_sources/apache_beam.io.filesystem.rst.txt | 7 +
.../_sources/apache_beam.io.filesystemio.rst.txt | 7 +
.../_sources/apache_beam.io.filesystems.rst.txt | 7 +
.../_sources/apache_beam.io.gcp.bigquery.rst.txt | 7 +
.../_sources/apache_beam.io.gcp.datastore.rst.txt | 15 +
....io.gcp.datastore.v1.adaptive_throttler.rst.txt | 7 +
...he_beam.io.gcp.datastore.v1.datastoreio.rst.txt | 7 +
...beam.io.gcp.datastore.v1.fake_datastore.rst.txt | 7 +
.../apache_beam.io.gcp.datastore.v1.helper.rst.txt | 7 +
...beam.io.gcp.datastore.v1.query_splitter.rst.txt | 7 +
.../apache_beam.io.gcp.datastore.v1.rst.txt | 20 +
.../apache_beam.io.gcp.datastore.v1.util.rst.txt | 7 +
.../apache_beam.io.gcp.gcsfilesystem.rst.txt | 7 +
.../_sources/apache_beam.io.gcp.gcsio.rst.txt | 7 +
.../_sources/apache_beam.io.gcp.pubsub.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.io.gcp.rst.txt | 25 +
.../apache_beam.io.hadoopfilesystem.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.io.iobase.rst.txt | 7 +
.../apache_beam.io.localfilesystem.rst.txt | 7 +
.../_sources/apache_beam.io.range_trackers.rst.txt | 7 +
.../apache_beam.io.restriction_trackers.rst.txt | 7 +
.../pydoc/2.4.0/_sources/apache_beam.io.rst.txt | 37 +
.../apache_beam.io.source_test_utils.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.io.textio.rst.txt | 7 +
.../_sources/apache_beam.io.tfrecordio.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.io.vcfio.rst.txt | 7 +
.../_sources/apache_beam.metrics.cells.rst.txt | 7 +
.../_sources/apache_beam.metrics.metric.rst.txt | 7 +
.../apache_beam.metrics.metricbase.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.metrics.rst.txt | 17 +
.../apache_beam.options.pipeline_options.rst.txt | 7 +
...beam.options.pipeline_options_validator.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.options.rst.txt | 17 +
.../apache_beam.options.value_provider.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.pipeline.rst.txt | 7 +
...tability.api.beam_artifact_api_pb2_grpc.rst.txt | 7 +
...am.portability.api.beam_fn_api_pb2_grpc.rst.txt | 7 +
...m.portability.api.beam_job_api_pb2_grpc.rst.txt | 7 +
...ability.api.beam_provision_api_pb2_grpc.rst.txt | 7 +
...ortability.api.beam_runner_api_pb2_grpc.rst.txt | 7 +
...beam.portability.api.endpoints_pb2_grpc.rst.txt | 7 +
.../_sources/apache_beam.portability.api.rst.txt | 21 +
...bility.api.standard_window_fns_pb2_grpc.rst.txt | 7 +
.../apache_beam.portability.common_urns.rst.txt | 7 +
.../apache_beam.portability.python_urns.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.portability.rst.txt | 23 +
.../2.4.0/_sources/apache_beam.pvalue.rst.txt | 7 +
..._beam.runners.dataflow.dataflow_metrics.rst.txt | 7 +
...e_beam.runners.dataflow.dataflow_runner.rst.txt | 7 +
..._beam.runners.dataflow.native_io.iobase.rst.txt | 7 +
.../apache_beam.runners.dataflow.native_io.rst.txt | 16 +
...ers.dataflow.native_io.streaming_create.rst.txt | 7 +
...m.runners.dataflow.ptransform_overrides.rst.txt | 7 +
.../_sources/apache_beam.runners.dataflow.rst.txt | 25 +
...m.runners.dataflow.test_dataflow_runner.rst.txt | 7 +
...ache_beam.runners.direct.bundle_factory.rst.txt | 7 +
.../apache_beam.runners.direct.clock.rst.txt | 7 +
...rect.consumer_tracking_pipeline_visitor.rst.txt | 7 +
...ache_beam.runners.direct.direct_metrics.rst.txt | 7 +
...pache_beam.runners.direct.direct_runner.rst.txt | 7 +
..._beam.runners.direct.evaluation_context.rst.txt | 7 +
.../apache_beam.runners.direct.executor.rst.txt | 7 +
...e_beam.runners.direct.helper_transforms.rst.txt | 7 +
.../_sources/apache_beam.runners.direct.rst.txt | 26 +
...e_beam.runners.direct.sdf_direct_runner.rst.txt | 7 +
...beam.runners.direct.transform_evaluator.rst.txt | 7 +
.../apache_beam.runners.direct.util.rst.txt | 7 +
...e_beam.runners.direct.watermark_manager.rst.txt | 7 +
...hon_rpc_direct.python_rpc_direct_runner.rst.txt | 7 +
....runners.experimental.python_rpc_direct.rst.txt | 16 +
...s.experimental.python_rpc_direct.server.rst.txt | 7 +
.../apache_beam.runners.experimental.rst.txt | 15 +
.../apache_beam.runners.job.manager.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.runners.job.rst.txt | 16 +
.../_sources/apache_beam.runners.job.utils.rst.txt | 7 +
.../apache_beam.runners.pipeline_context.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.runners.rst.txt | 27 +
.../_sources/apache_beam.runners.runner.rst.txt | 7 +
.../apache_beam.runners.sdf_common.rst.txt | 7 +
.../apache_beam.testing.pipeline_verifiers.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.testing.rst.txt | 19 +
.../apache_beam.testing.test_pipeline.rst.txt | 7 +
.../apache_beam.testing.test_stream.rst.txt | 7 +
.../apache_beam.testing.test_utils.rst.txt | 7 +
.../_sources/apache_beam.testing.util.rst.txt | 7 +
.../apache_beam.transforms.combiners.rst.txt | 7 +
.../_sources/apache_beam.transforms.core.rst.txt | 7 +
.../apache_beam.transforms.display.rst.txt | 7 +
.../apache_beam.transforms.ptransform.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.transforms.rst.txt | 23 +
.../apache_beam.transforms.sideinputs.rst.txt | 7 +
.../apache_beam.transforms.timeutil.rst.txt | 7 +
.../apache_beam.transforms.trigger.rst.txt | 7 +
.../_sources/apache_beam.transforms.util.rst.txt | 7 +
.../_sources/apache_beam.transforms.window.rst.txt | 7 +
.../apache_beam.typehints.decorators.rst.txt | 7 +
...eam.typehints.native_type_compatibility.rst.txt | 7 +
.../_sources/apache_beam.typehints.opcodes.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.typehints.rst.txt | 20 +
...apache_beam.typehints.trivial_inference.rst.txt | 7 +
.../apache_beam.typehints.typecheck.rst.txt | 7 +
.../apache_beam.typehints.typehints.rst.txt | 7 +
.../_sources/apache_beam.utils.annotations.rst.txt | 7 +
.../_sources/apache_beam.utils.plugin.rst.txt | 7 +
.../_sources/apache_beam.utils.processes.rst.txt | 7 +
.../_sources/apache_beam.utils.profiler.rst.txt | 7 +
.../_sources/apache_beam.utils.proto_utils.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.utils.retry.rst.txt | 7 +
.../pydoc/2.4.0/_sources/apache_beam.utils.rst.txt | 22 +
.../_sources/apache_beam.utils.timestamp.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.utils.urns.rst.txt | 7 +
.../2.4.0/_sources/apache_beam.version.rst.txt | 7 +
.../sdks/pydoc/2.4.0/_sources/index.rst.txt | 2 +
.../sdks/pydoc/2.4.0/_static/ajax-loader.gif | Bin 0 -> 673 bytes
.../sdks/pydoc/2.4.0/_static/basic.css | 643 ++
.../sdks/pydoc/2.4.0/_static/comment-bright.png | Bin 0 -> 756 bytes
.../sdks/pydoc/2.4.0/_static/comment-close.png | Bin 0 -> 829 bytes
.../sdks/pydoc/2.4.0/_static/comment.png | Bin 0 -> 641 bytes
.../sdks/pydoc/2.4.0/_static/css/badge_only.css | 2 +
.../sdks/pydoc/2.4.0/_static/css/theme.css | 5 +
.../sdks/pydoc/2.4.0/_static/doctools.js | 311 +
.../sdks/pydoc/2.4.0/_static/down-pressed.png | Bin 0 -> 222 bytes
.../sdks/pydoc/2.4.0/_static/down.png | Bin 0 -> 202 bytes
.../sdks/pydoc/2.4.0/_static/file.png | Bin 0 -> 286 bytes
.../pydoc/2.4.0/_static/fonts/Inconsolata-Bold.ttf | Bin 0 -> 109948 bytes
.../2.4.0/_static/fonts/Inconsolata-Regular.ttf | Bin 0 -> 96964 bytes
.../sdks/pydoc/2.4.0/_static/fonts/Lato-Bold.ttf | Bin 0 -> 656544 bytes
.../pydoc/2.4.0/_static/fonts/Lato-Regular.ttf | Bin 0 -> 656568 bytes
.../pydoc/2.4.0/_static/fonts/RobotoSlab-Bold.ttf | Bin 0 -> 170616 bytes
.../2.4.0/_static/fonts/RobotoSlab-Regular.ttf | Bin 0 -> 169064 bytes
.../2.4.0/_static/fonts/fontawesome-webfont.eot | Bin 0 -> 76518 bytes
.../2.4.0/_static/fonts/fontawesome-webfont.svg | 685 ++
.../2.4.0/_static/fonts/fontawesome-webfont.ttf | Bin 0 -> 152796 bytes
.../2.4.0/_static/fonts/fontawesome-webfont.woff | Bin 0 -> 90412 bytes
.../sdks/pydoc/2.4.0/_static/jquery-3.1.0.js | 10074 +++++++++++++++++++
.../sdks/pydoc/2.4.0/_static/jquery.js | 4 +
.../sdks/pydoc/2.4.0/_static/js/modernizr.min.js | 4 +
.../sdks/pydoc/2.4.0/_static/js/theme.js | 169 +
.../sdks/pydoc/2.4.0/_static/minus.png | Bin 0 -> 90 bytes
.../sdks/pydoc/2.4.0/_static/plus.png | Bin 0 -> 90 bytes
.../sdks/pydoc/2.4.0/_static/pygments.css | 2 +
.../sdks/pydoc/2.4.0/_static/searchtools.js | 761 ++
.../sdks/pydoc/2.4.0/_static/underscore-1.3.1.js | 999 ++
.../sdks/pydoc/2.4.0/_static/underscore.js | 31 +
.../sdks/pydoc/2.4.0/_static/up-pressed.png | Bin 0 -> 214 bytes
src/documentation/sdks/pydoc/2.4.0/_static/up.png | Bin 0 -> 203 bytes
.../sdks/pydoc/2.4.0/_static/websupport.js | 808 ++
.../pydoc/2.4.0/apache_beam.coders.coders.html | 731 ++
.../sdks/pydoc/2.4.0/apache_beam.coders.html | 266 +
.../pydoc/2.4.0/apache_beam.coders.observable.html | 279 +
.../2.4.0/apache_beam.coders.slow_stream.html | 401 +
.../pydoc/2.4.0/apache_beam.coders.typecoders.html | 306 +
.../sdks/pydoc/2.4.0/apache_beam.error.html | 296 +
.../pydoc/2.4.0/apache_beam.internal.gcp.auth.html | 293 +
.../sdks/pydoc/2.4.0/apache_beam.internal.gcp.html | 269 +
.../2.4.0/apache_beam.internal.gcp.json_value.html | 341 +
.../sdks/pydoc/2.4.0/apache_beam.internal.html | 282 +
.../pydoc/2.4.0/apache_beam.internal.pickler.html | 302 +
.../pydoc/2.4.0/apache_beam.internal.util.html | 347 +
.../sdks/pydoc/2.4.0/apache_beam.io.avroio.html | 457 +
.../pydoc/2.4.0/apache_beam.io.concat_source.html | 385 +
.../pydoc/2.4.0/apache_beam.io.filebasedsink.html | 351 +
.../2.4.0/apache_beam.io.filebasedsource.html | 402 +
.../pydoc/2.4.0/apache_beam.io.filesystem.html | 659 ++
.../pydoc/2.4.0/apache_beam.io.filesystemio.html | 537 +
.../pydoc/2.4.0/apache_beam.io.filesystems.html | 524 +
.../pydoc/2.4.0/apache_beam.io.gcp.bigquery.html | 702 ++
.../pydoc/2.4.0/apache_beam.io.gcp.datastore.html | 281 +
...eam.io.gcp.datastore.v1.adaptive_throttler.html | 334 +
...pache_beam.io.gcp.datastore.v1.datastoreio.html | 436 +
...he_beam.io.gcp.datastore.v1.fake_datastore.html | 317 +
.../apache_beam.io.gcp.datastore.v1.helper.html | 395 +
.../2.4.0/apache_beam.io.gcp.datastore.v1.html | 277 +
...he_beam.io.gcp.datastore.v1.query_splitter.html | 299 +
.../apache_beam.io.gcp.datastore.v1.util.html | 298 +
.../2.4.0/apache_beam.io.gcp.gcsfilesystem.html | 488 +
.../sdks/pydoc/2.4.0/apache_beam.io.gcp.gcsio.html | 405 +
.../sdks/pydoc/2.4.0/apache_beam.io.gcp.html | 296 +
.../pydoc/2.4.0/apache_beam.io.gcp.pubsub.html | 330 +
.../2.4.0/apache_beam.io.hadoopfilesystem.html | 401 +
.../sdks/pydoc/2.4.0/apache_beam.io.html | 333 +
.../sdks/pydoc/2.4.0/apache_beam.io.iobase.html | 1208 +++
.../2.4.0/apache_beam.io.localfilesystem.html | 488 +
.../pydoc/2.4.0/apache_beam.io.range_trackers.html | 480 +
.../2.4.0/apache_beam.io.restriction_trackers.html | 327 +
.../2.4.0/apache_beam.io.source_test_utils.html | 525 +
.../sdks/pydoc/2.4.0/apache_beam.io.textio.html | 426 +
.../pydoc/2.4.0/apache_beam.io.tfrecordio.html | 352 +
.../sdks/pydoc/2.4.0/apache_beam.io.vcfio.html | 423 +
.../pydoc/2.4.0/apache_beam.metrics.cells.html | 312 +
.../sdks/pydoc/2.4.0/apache_beam.metrics.html | 264 +
.../pydoc/2.4.0/apache_beam.metrics.metric.html | 434 +
.../2.4.0/apache_beam.metrics.metricbase.html | 366 +
.../sdks/pydoc/2.4.0/apache_beam.options.html | 264 +
.../apache_beam.options.pipeline_options.html | 639 ++
...he_beam.options.pipeline_options_validator.html | 421 +
.../2.4.0/apache_beam.options.value_provider.html | 328 +
.../sdks/pydoc/2.4.0/apache_beam.pipeline.html | 455 +
...portability.api.beam_artifact_api_pb2_grpc.html | 349 +
..._beam.portability.api.beam_fn_api_pb2_grpc.html | 405 +
...beam.portability.api.beam_job_api_pb2_grpc.html | 326 +
...ortability.api.beam_provision_api_pb2_grpc.html | 299 +
...m.portability.api.beam_runner_api_pb2_grpc.html | 261 +
...he_beam.portability.api.endpoints_pb2_grpc.html | 261 +
.../pydoc/2.4.0/apache_beam.portability.api.html | 275 +
...rtability.api.standard_window_fns_pb2_grpc.html | 261 +
.../2.4.0/apache_beam.portability.common_urns.html | 257 +
.../sdks/pydoc/2.4.0/apache_beam.portability.html | 287 +
.../2.4.0/apache_beam.portability.python_urns.html | 259 +
.../sdks/pydoc/2.4.0/apache_beam.pvalue.html | 402 +
...che_beam.runners.dataflow.dataflow_metrics.html | 297 +
...ache_beam.runners.dataflow.dataflow_runner.html | 407 +
.../pydoc/2.4.0/apache_beam.runners.dataflow.html | 292 +
.../apache_beam.runners.dataflow.native_io.html | 274 +
...che_beam.runners.dataflow.native_io.iobase.html | 505 +
...unners.dataflow.native_io.streaming_create.html | 318 +
...beam.runners.dataflow.ptransform_overrides.html | 283 +
...beam.runners.dataflow.test_dataflow_runner.html | 278 +
.../apache_beam.runners.direct.bundle_factory.html | 293 +
.../2.4.0/apache_beam.runners.direct.clock.html | 312 +
....direct.consumer_tracking_pipeline_visitor.html | 281 +
.../apache_beam.runners.direct.direct_metrics.html | 327 +
.../apache_beam.runners.direct.direct_runner.html | 302 +
...che_beam.runners.direct.evaluation_context.html | 397 +
.../2.4.0/apache_beam.runners.direct.executor.html | 303 +
...ache_beam.runners.direct.helper_transforms.html | 321 +
.../pydoc/2.4.0/apache_beam.runners.direct.html | 284 +
...ache_beam.runners.direct.sdf_direct_runner.html | 424 +
...he_beam.runners.direct.transform_evaluator.html | 332 +
.../2.4.0/apache_beam.runners.direct.util.html | 287 +
...ache_beam.runners.direct.watermark_manager.html | 314 +
.../2.4.0/apache_beam.runners.experimental.html | 277 +
...eam.runners.experimental.python_rpc_direct.html | 275 +
...python_rpc_direct.python_rpc_direct_runner.html | 280 +
...ners.experimental.python_rpc_direct.server.html | 303 +
.../sdks/pydoc/2.4.0/apache_beam.runners.html | 341 +
.../sdks/pydoc/2.4.0/apache_beam.runners.job.html | 271 +
.../2.4.0/apache_beam.runners.job.manager.html | 272 +
.../pydoc/2.4.0/apache_beam.runners.job.utils.html | 275 +
.../apache_beam.runners.pipeline_context.html | 278 +
.../pydoc/2.4.0/apache_beam.runners.runner.html | 485 +
.../2.4.0/apache_beam.runners.sdf_common.html | 371 +
.../sdks/pydoc/2.4.0/apache_beam.testing.html | 268 +
.../apache_beam.testing.pipeline_verifiers.html | 324 +
.../2.4.0/apache_beam.testing.test_pipeline.html | 351 +
.../2.4.0/apache_beam.testing.test_stream.html | 346 +
.../2.4.0/apache_beam.testing.test_utils.html | 324 +
.../sdks/pydoc/2.4.0/apache_beam.testing.util.html | 329 +
.../2.4.0/apache_beam.transforms.combiners.html | 419 +
.../pydoc/2.4.0/apache_beam.transforms.core.html | 1256 +++
.../2.4.0/apache_beam.transforms.display.html | 462 +
.../sdks/pydoc/2.4.0/apache_beam.transforms.html | 277 +
.../2.4.0/apache_beam.transforms.ptransform.html | 470 +
.../2.4.0/apache_beam.transforms.sideinputs.html | 285 +
.../2.4.0/apache_beam.transforms.timeutil.html | 291 +
.../2.4.0/apache_beam.transforms.trigger.html | 732 ++
.../pydoc/2.4.0/apache_beam.transforms.util.html | 408 +
.../pydoc/2.4.0/apache_beam.transforms.window.html | 663 ++
.../2.4.0/apache_beam.typehints.decorators.html | 489 +
.../sdks/pydoc/2.4.0/apache_beam.typehints.html | 271 +
...e_beam.typehints.native_type_compatibility.html | 304 +
.../pydoc/2.4.0/apache_beam.typehints.opcodes.html | 740 ++
.../apache_beam.typehints.trivial_inference.html | 422 +
.../2.4.0/apache_beam.typehints.typecheck.html | 371 +
.../2.4.0/apache_beam.typehints.typehints.html | 329 +
.../pydoc/2.4.0/apache_beam.utils.annotations.html | 325 +
.../sdks/pydoc/2.4.0/apache_beam.utils.html | 276 +
.../sdks/pydoc/2.4.0/apache_beam.utils.plugin.html | 284 +
.../pydoc/2.4.0/apache_beam.utils.processes.html | 284 +
.../pydoc/2.4.0/apache_beam.utils.profiler.html | 327 +
.../pydoc/2.4.0/apache_beam.utils.proto_utils.html | 295 +
.../sdks/pydoc/2.4.0/apache_beam.utils.retry.html | 386 +
.../pydoc/2.4.0/apache_beam.utils.timestamp.html | 338 +
.../sdks/pydoc/2.4.0/apache_beam.utils.urns.html | 315 +
.../sdks/pydoc/2.4.0/apache_beam.version.html | 244 +
.../sdks/pydoc/2.4.0/doctest/output.txt | 25 +
.../doctrees/apache_beam.coders.coders.doctree | Bin 0 -> 164642 bytes
.../2.4.0/doctrees/apache_beam.coders.doctree | Bin 0 -> 3822 bytes
.../doctrees/apache_beam.coders.observable.doctree | Bin 0 -> 11105 bytes
.../apache_beam.coders.slow_stream.doctree | Bin 0 -> 51861 bytes
.../doctrees/apache_beam.coders.typecoders.doctree | Bin 0 -> 10813 bytes
.../pydoc/2.4.0/doctrees/apache_beam.error.doctree | Bin 0 -> 22542 bytes
.../2.4.0/doctrees/apache_beam.internal.doctree | Bin 0 -> 4621 bytes
.../doctrees/apache_beam.internal.gcp.auth.doctree | Bin 0 -> 13086 bytes
.../doctrees/apache_beam.internal.gcp.doctree | Bin 0 -> 4231 bytes
.../apache_beam.internal.gcp.json_value.doctree | Bin 0 -> 28712 bytes
.../doctrees/apache_beam.internal.pickler.doctree | Bin 0 -> 18911 bytes
.../doctrees/apache_beam.internal.util.doctree | Bin 0 -> 25497 bytes
.../2.4.0/doctrees/apache_beam.io.avroio.doctree | Bin 0 -> 54720 bytes
.../doctrees/apache_beam.io.concat_source.doctree | Bin 0 -> 41615 bytes
.../pydoc/2.4.0/doctrees/apache_beam.io.doctree | Bin 0 -> 5157 bytes
.../doctrees/apache_beam.io.filebasedsink.doctree | Bin 0 -> 35453 bytes
.../apache_beam.io.filebasedsource.doctree | Bin 0 -> 48811 bytes
.../doctrees/apache_beam.io.filesystem.doctree | Bin 0 -> 106059 bytes
.../doctrees/apache_beam.io.filesystemio.doctree | Bin 0 -> 65558 bytes
.../doctrees/apache_beam.io.filesystems.doctree | Bin 0 -> 62916 bytes
.../doctrees/apache_beam.io.gcp.bigquery.doctree | Bin 0 -> 150841 bytes
.../doctrees/apache_beam.io.gcp.datastore.doctree | Bin 0 -> 3827 bytes
....io.gcp.datastore.v1.adaptive_throttler.doctree | Bin 0 -> 20499 bytes
...he_beam.io.gcp.datastore.v1.datastoreio.doctree | Bin 0 -> 67046 bytes
.../apache_beam.io.gcp.datastore.v1.doctree | Bin 0 -> 4156 bytes
...beam.io.gcp.datastore.v1.fake_datastore.doctree | Bin 0 -> 18659 bytes
.../apache_beam.io.gcp.datastore.v1.helper.doctree | Bin 0 -> 48207 bytes
...beam.io.gcp.datastore.v1.query_splitter.doctree | Bin 0 -> 12545 bytes
.../apache_beam.io.gcp.datastore.v1.util.doctree | Bin 0 -> 14256 bytes
.../2.4.0/doctrees/apache_beam.io.gcp.doctree | Bin 0 -> 4271 bytes
.../apache_beam.io.gcp.gcsfilesystem.doctree | Bin 0 -> 54532 bytes
.../doctrees/apache_beam.io.gcp.gcsio.doctree | Bin 0 -> 39919 bytes
.../doctrees/apache_beam.io.gcp.pubsub.doctree | Bin 0 -> 23476 bytes
.../apache_beam.io.hadoopfilesystem.doctree | Bin 0 -> 38463 bytes
.../2.4.0/doctrees/apache_beam.io.iobase.doctree | Bin 0 -> 245452 bytes
.../apache_beam.io.localfilesystem.doctree | Bin 0 -> 52583 bytes
.../doctrees/apache_beam.io.range_trackers.doctree | Bin 0 -> 78762 bytes
.../apache_beam.io.restriction_trackers.doctree | Bin 0 -> 24646 bytes
.../apache_beam.io.source_test_utils.doctree | Bin 0 -> 78825 bytes
.../2.4.0/doctrees/apache_beam.io.textio.doctree | Bin 0 -> 63106 bytes
.../doctrees/apache_beam.io.tfrecordio.doctree | Bin 0 -> 27083 bytes
.../2.4.0/doctrees/apache_beam.io.vcfio.doctree | Bin 0 -> 59967 bytes
.../doctrees/apache_beam.metrics.cells.doctree | Bin 0 -> 17638 bytes
.../2.4.0/doctrees/apache_beam.metrics.doctree | Bin 0 -> 3789 bytes
.../doctrees/apache_beam.metrics.metric.doctree | Bin 0 -> 53967 bytes
.../apache_beam.metrics.metricbase.doctree | Bin 0 -> 36945 bytes
.../2.4.0/doctrees/apache_beam.options.doctree | Bin 0 -> 3826 bytes
.../apache_beam.options.pipeline_options.doctree | Bin 0 -> 98811 bytes
...beam.options.pipeline_options_validator.doctree | Bin 0 -> 56743 bytes
.../apache_beam.options.value_provider.doctree | Bin 0 -> 29728 bytes
.../2.4.0/doctrees/apache_beam.pipeline.doctree | Bin 0 -> 65445 bytes
...tability.api.beam_artifact_api_pb2_grpc.doctree | Bin 0 -> 35375 bytes
...am.portability.api.beam_fn_api_pb2_grpc.doctree | Bin 0 -> 51764 bytes
...m.portability.api.beam_job_api_pb2_grpc.doctree | Bin 0 -> 27921 bytes
...ability.api.beam_provision_api_pb2_grpc.doctree | Bin 0 -> 17127 bytes
...ortability.api.beam_runner_api_pb2_grpc.doctree | Bin 0 -> 3416 bytes
.../doctrees/apache_beam.portability.api.doctree | Bin 0 -> 4872 bytes
...beam.portability.api.endpoints_pb2_grpc.doctree | Bin 0 -> 3346 bytes
...bility.api.standard_window_fns_pb2_grpc.doctree | Bin 0 -> 3460 bytes
.../apache_beam.portability.common_urns.doctree | Bin 0 -> 3221 bytes
.../2.4.0/doctrees/apache_beam.portability.doctree | Bin 0 -> 4673 bytes
.../apache_beam.portability.python_urns.doctree | Bin 0 -> 3844 bytes
.../2.4.0/doctrees/apache_beam.pvalue.doctree | Bin 0 -> 44732 bytes
..._beam.runners.dataflow.dataflow_metrics.doctree | Bin 0 -> 12968 bytes
...e_beam.runners.dataflow.dataflow_runner.doctree | Bin 0 -> 55167 bytes
.../doctrees/apache_beam.runners.dataflow.doctree | Bin 0 -> 5296 bytes
.../apache_beam.runners.dataflow.native_io.doctree | Bin 0 -> 4029 bytes
..._beam.runners.dataflow.native_io.iobase.doctree | Bin 0 -> 75287 bytes
...ers.dataflow.native_io.streaming_create.doctree | Bin 0 -> 25213 bytes
...m.runners.dataflow.ptransform_overrides.doctree | Bin 0 -> 11696 bytes
...m.runners.dataflow.test_dataflow_runner.doctree | Bin 0 -> 9663 bytes
...ache_beam.runners.direct.bundle_factory.doctree | Bin 0 -> 12817 bytes
.../apache_beam.runners.direct.clock.doctree | Bin 0 -> 20459 bytes
...rect.consumer_tracking_pipeline_visitor.doctree | Bin 0 -> 10796 bytes
...ache_beam.runners.direct.direct_metrics.doctree | Bin 0 -> 26746 bytes
...pache_beam.runners.direct.direct_runner.doctree | Bin 0 -> 17235 bytes
.../doctrees/apache_beam.runners.direct.doctree | Bin 0 -> 5193 bytes
..._beam.runners.direct.evaluation_context.doctree | Bin 0 -> 47388 bytes
.../apache_beam.runners.direct.executor.doctree | Bin 0 -> 19645 bytes
...e_beam.runners.direct.helper_transforms.doctree | Bin 0 -> 26799 bytes
...e_beam.runners.direct.sdf_direct_runner.doctree | Bin 0 -> 63699 bytes
...beam.runners.direct.transform_evaluator.doctree | Bin 0 -> 28951 bytes
.../apache_beam.runners.direct.util.doctree | Bin 0 -> 15623 bytes
...e_beam.runners.direct.watermark_manager.doctree | Bin 0 -> 20536 bytes
.../2.4.0/doctrees/apache_beam.runners.doctree | Bin 0 -> 5019 bytes
.../apache_beam.runners.experimental.doctree | Bin 0 -> 3892 bytes
....runners.experimental.python_rpc_direct.doctree | Bin 0 -> 4829 bytes
...hon_rpc_direct.python_rpc_direct_runner.doctree | Bin 0 -> 10784 bytes
...s.experimental.python_rpc_direct.server.doctree | Bin 0 -> 18814 bytes
.../2.4.0/doctrees/apache_beam.runners.job.doctree | Bin 0 -> 3807 bytes
.../apache_beam.runners.job.manager.doctree | Bin 0 -> 7229 bytes
.../doctrees/apache_beam.runners.job.utils.doctree | Bin 0 -> 7521 bytes
.../apache_beam.runners.pipeline_context.doctree | Bin 0 -> 11325 bytes
.../doctrees/apache_beam.runners.runner.doctree | Bin 0 -> 62070 bytes
.../apache_beam.runners.sdf_common.doctree | Bin 0 -> 50370 bytes
.../2.4.0/doctrees/apache_beam.testing.doctree | Bin 0 -> 3890 bytes
.../apache_beam.testing.pipeline_verifiers.doctree | Bin 0 -> 26219 bytes
.../apache_beam.testing.test_pipeline.doctree | Bin 0 -> 31755 bytes
.../apache_beam.testing.test_stream.doctree | Bin 0 -> 34330 bytes
.../apache_beam.testing.test_utils.doctree | Bin 0 -> 21602 bytes
.../doctrees/apache_beam.testing.util.doctree | Bin 0 -> 23466 bytes
.../apache_beam.transforms.combiners.doctree | Bin 0 -> 60757 bytes
.../doctrees/apache_beam.transforms.core.doctree | Bin 0 -> 322302 bytes
.../apache_beam.transforms.display.doctree | Bin 0 -> 62211 bytes
.../2.4.0/doctrees/apache_beam.transforms.doctree | Bin 0 -> 4430 bytes
.../apache_beam.transforms.ptransform.doctree | Bin 0 -> 63801 bytes
.../apache_beam.transforms.sideinputs.doctree | Bin 0 -> 11821 bytes
.../apache_beam.transforms.timeutil.doctree | Bin 0 -> 12110 bytes
.../apache_beam.transforms.trigger.doctree | Bin 0 -> 166062 bytes
.../doctrees/apache_beam.transforms.util.doctree | Bin 0 -> 41503 bytes
.../doctrees/apache_beam.transforms.window.doctree | Bin 0 -> 129513 bytes
.../apache_beam.typehints.decorators.doctree | Bin 0 -> 52332 bytes
.../2.4.0/doctrees/apache_beam.typehints.doctree | Bin 0 -> 4398 bytes
...eam.typehints.native_type_compatibility.doctree | Bin 0 -> 14775 bytes
.../doctrees/apache_beam.typehints.opcodes.doctree | Bin 0 -> 174037 bytes
...apache_beam.typehints.trivial_inference.doctree | Bin 0 -> 53762 bytes
.../apache_beam.typehints.typecheck.doctree | Bin 0 -> 49120 bytes
.../apache_beam.typehints.typehints.doctree | Bin 0 -> 19731 bytes
.../doctrees/apache_beam.utils.annotations.doctree | Bin 0 -> 14433 bytes
.../pydoc/2.4.0/doctrees/apache_beam.utils.doctree | Bin 0 -> 4538 bytes
.../doctrees/apache_beam.utils.plugin.doctree | Bin 0 -> 11183 bytes
.../doctrees/apache_beam.utils.processes.doctree | Bin 0 -> 11599 bytes
.../doctrees/apache_beam.utils.profiler.doctree | Bin 0 -> 20085 bytes
.../doctrees/apache_beam.utils.proto_utils.doctree | Bin 0 -> 15326 bytes
.../2.4.0/doctrees/apache_beam.utils.retry.doctree | Bin 0 -> 40777 bytes
.../doctrees/apache_beam.utils.timestamp.doctree | Bin 0 -> 24941 bytes
.../2.4.0/doctrees/apache_beam.utils.urns.doctree | Bin 0 -> 21353 bytes
.../2.4.0/doctrees/apache_beam.version.doctree | Bin 0 -> 3412 bytes
.../sdks/pydoc/2.4.0/doctrees/environment.pickle | Bin 0 -> 3300752 bytes
.../sdks/pydoc/2.4.0/doctrees/index.doctree | Bin 0 -> 20904 bytes
src/documentation/sdks/pydoc/2.4.0/genindex.html | 3744 +++++++
src/documentation/sdks/pydoc/2.4.0/index.html | 560 ++
src/documentation/sdks/pydoc/2.4.0/objects.inv | Bin 0 -> 14392 bytes
.../sdks/pydoc/2.4.0/py-modindex.html | 891 ++
src/documentation/sdks/pydoc/2.4.0/search.html | 248 +
src/documentation/sdks/pydoc/2.4.0/searchindex.js | 1 +
src/documentation/sdks/pydoc/current.md | 2 +-
531 files changed, 126550 insertions(+), 1 deletion(-)
diff --git a/src/documentation/sdks/pydoc/2.4.0/.buildinfo b/src/documentation/sdks/pydoc/2.4.0/.buildinfo
new file mode 100644
index 0000000..0885fbe
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/.buildinfo
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config:
+tags:
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/abc.html b/src/documentation/sdks/pydoc/2.4.0/_modules/abc.html
new file mode 100644
index 0000000..61a9a73
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/abc.html
@@ -0,0 +1,419 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>abc — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../genindex.html"/>
+ <link rel="search" title="Search" href="../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>abc</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for abc</h1><div class="highlight"><pre>
+<span></span><span class="c1"># Copyright 2007 Google, Inc. All Rights Reserved.</span>
+<span class="c1"># Licensed to PSF under a Contributor Agreement.</span>
+
+<span class="sd">"""Abstract Base Classes (ABCs) according to PEP 3119."""</span>
+
+<span class="kn">import</span> <span class="nn">types</span>
+
+<span class="kn">from</span> <span class="nn">_weakrefset</span> <span class="k">import</span> <span class="n">WeakSet</span>
+
+<span class="c1"># Instance of old-style class</span>
+<span class="k">class</span> <span class="nc">_C</span><span class="p">:</span> <span class="k">pass</span>
+<span class="n">_InstanceType</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">_C</span><span class="p">())</span>
+
+
+<span class="k">def</span> <span class="nf">abstractmethod</span><span class="p">(</span><span class="n">funcobj</span><span class="p">):</span>
+ <span class="sd">"""A decorator indicating abstract methods.</span>
+
+<span class="sd"> Requires that the metaclass is ABCMeta or derived from it. A</span>
+<span class="sd"> class that has a metaclass derived from ABCMeta cannot be</span>
+<span class="sd"> instantiated unless all of its abstract methods are overridden.</span>
+<span class="sd"> The abstract methods can be called using any of the normal</span>
+<span class="sd"> 'super' call mechanisms.</span>
+
+<span class="sd"> Usage:</span>
+
+<span class="sd"> class C:</span>
+<span class="sd"> __metaclass__ = ABCMeta</span>
+<span class="sd"> @abstractmethod</span>
+<span class="sd"> def my_abstract_method(self, ...):</span>
+<span class="sd"> ...</span>
+<span class="sd"> """</span>
+ <span class="n">funcobj</span><span class="o">.</span><span class="n">__isabstractmethod__</span> <span class="o">=</span> <span class="kc">True</span>
+ <span class="k">return</span> <span class="n">funcobj</span>
+
+
+<span class="k">class</span> <span class="nc">abstractproperty</span><span class="p">(</span><span class="nb">property</span><span class="p">):</span>
+ <span class="sd">"""A decorator indicating abstract properties.</span>
+
+<span class="sd"> Requires that the metaclass is ABCMeta or derived from it. A</span>
+<span class="sd"> class that has a metaclass derived from ABCMeta cannot be</span>
+<span class="sd"> instantiated unless all of its abstract properties are overridden.</span>
+<span class="sd"> The abstract properties can be called using any of the normal</span>
+<span class="sd"> 'super' call mechanisms.</span>
+
+<span class="sd"> Usage:</span>
+
+<span class="sd"> class C:</span>
+<span class="sd"> __metaclass__ = ABCMeta</span>
+<span class="sd"> @abstractproperty</span>
+<span class="sd"> def my_abstract_property(self):</span>
+<span class="sd"> ...</span>
+
+<span class="sd"> This defines a read-only property; you can also define a read-write</span>
+<span class="sd"> abstract property using the 'long' form of property declaration:</span>
+
+<span class="sd"> class C:</span>
+<span class="sd"> __metaclass__ = ABCMeta</span>
+<span class="sd"> def getx(self): ...</span>
+<span class="sd"> def setx(self, value): ...</span>
+<span class="sd"> x = abstractproperty(getx, setx)</span>
+<span class="sd"> """</span>
+ <span class="n">__isabstractmethod__</span> <span class="o">=</span> <span class="kc">True</span>
+
+
+<span class="k">class</span> <span class="nc">ABCMeta</span><span class="p">(</span><span class="nb">type</span><span class="p">):</span>
+
+ <span class="sd">"""Metaclass for defining Abstract Base Classes (ABCs).</span>
+
+<span class="sd"> Use this metaclass to create an ABC. An ABC can be subclassed</span>
+<span class="sd"> directly, and then acts as a mix-in class. You can also register</span>
+<span class="sd"> unrelated concrete classes (even built-in classes) and unrelated</span>
+<span class="sd"> ABCs as 'virtual subclasses' -- these and their descendants will</span>
+<span class="sd"> be considered subclasses of the registering ABC by the built-in</span>
+<span class="sd"> issubclass() function, but the registering ABC won't show up in</span>
+<span class="sd"> their MRO (Method Resolution Order) nor will method</span>
+<span class="sd"> implementations defined by the registering ABC be callable (not</span>
+<span class="sd"> even via super()).</span>
+
+<span class="sd"> """</span>
+
+ <span class="c1"># A global counter that is incremented each time a class is</span>
+ <span class="c1"># registered as a virtual subclass of anything. It forces the</span>
+ <span class="c1"># negative cache to be cleared before its next use.</span>
+ <span class="n">_abc_invalidation_counter</span> <span class="o">=</span> <span class="mi">0</span>
+
+ <span class="k">def</span> <span class="nf">__new__</span><span class="p">(</span><span class="n">mcls</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">bases</span><span class="p">,</span> <span class="n">namespace</span><span class="p">):</span>
+ <span class="bp">cls</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">ABCMeta</span><span class="p">,</span> <span class="n">mcls</span><span class="p">)</span><span class="o">.</span><span class="fm">__new__</span><span class="p">(</span><span class="n">mcls</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">bases</span><span class="p">,</span> <span class="n">namespace</span><spa [...]
+ <span class="c1"># Compute set of abstract method names</span>
+ <span class="n">abstracts</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">name</span>
+ <span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">namespace</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
+ <span class="k">if</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="s2">"__isabstractmethod__"</span><span class="p">,</span> <span class="kc">False</span><span class="p">))</span>
+ <span class="k">for</span> <span class="n">base</span> <span class="ow">in</span> <span class="n">bases</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">base</span><span class="p">,</span> <span class="s2">"__abstractmethods__"</span><span class="p">,</span> <span class="nb">set</span><span class="p">()):</span>
+ <span class="n">value</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="s2">"__isabstractmethod__"</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
+ <span class="n">abstracts</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">__abstractmethods__</span> <span class="o">=</span> <span class="nb">frozenset</span><span class="p">(</span><span class="n">abstracts</span><span class="p">)</span>
+ <span class="c1"># Set up inheritance registry</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_registry</span> <span class="o">=</span> <span class="n">WeakSet</span><span class="p">()</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_cache</span> <span class="o">=</span> <span class="n">WeakSet</span><span class="p">()</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_negative_cache</span> <span class="o">=</span> <span class="n">WeakSet</span><span class="p">()</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_negative_cache_version</span> <span class="o">=</span> <span class="n">ABCMeta</span><span class="o">.</span><span class="n">_abc_invalidation_counter</span>
+ <span class="k">return</span> <span class="bp">cls</span>
+
+ <span class="k">def</span> <span class="nf">register</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">subclass</span><span class="p">):</span>
+ <span class="sd">"""Register a virtual subclass of an ABC."""</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">subclass</span><span class="p">,</span> <span class="p">(</span><span class="nb">type</span><span class="p">,</span> <span class="n">types</span><span class="o">.</span><span class="n">ClassType</span><span class="p">)):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"Can only register classes"</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">issubclass</span><span class="p">(</span><span class="n">subclass</span><span class="p">,</span> <span class="bp">cls</span><span class="p">):</span>
+ <span class="k">return</span> <span class="c1"># Already a subclass</span>
+ <span class="c1"># Subtle: test for cycles *after* testing for "already a subclass";</span>
+ <span class="c1"># this means we allow X.register(X) and interpret it as a no-op.</span>
+ <span class="k">if</span> <span class="nb">issubclass</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">subclass</span><span class="p">):</span>
+ <span class="c1"># This would create a cycle, which is bad for the algorithm below</span>
+ <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Refusing to create an inheritance cycle"</span><span class="p">)</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_registry</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">subclass</span><span class="p">)</span>
+ <span class="n">ABCMeta</span><span class="o">.</span><span class="n">_abc_invalidation_counter</span> <span class="o">+=</span> <span class="mi">1</span> <span class="c1"># Invalidate negative cache</span>
+
+ <span class="k">def</span> <span class="nf">_dump_registry</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">file</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="sd">"""Debug helper to print the ABC registry."""</span>
+ <span class="nb">print</span> <span class="o">>></span> <span class="n">file</span><span class="p">,</span> <span class="s2">"Class: </span><span class="si">%s</span><span class="s2">.</span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="bp">cls</span><span class="o">.</span><span class="vm">__module__</span><span class="p">,</span> <span class="bp">cls</span><span class="o">.</span><span class="vm [...]
+ <span class="nb">print</span> <span class="o">>></span> <span class="n">file</span><span class="p">,</span> <span class="s2">"Inv.counter: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">ABCMeta</span><span class="o">.</span><span class="n">_abc_invalidation_counter</span>
+ <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="bp">cls</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">keys</span><span class="p">()):</span>
+ <span class="k">if</span> <span class="n">name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"_abc_"</span><span class="p">):</span>
+ <span class="n">value</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span>
+ <span class="nb">print</span> <span class="o">>></span> <span class="n">file</span><span class="p">,</span> <span class="s2">"</span><span class="si">%s</span><span class="s2">: </span><span class="si">%r</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__instancecheck__</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">instance</span><span class="p">):</span>
+ <span class="sd">"""Override for isinstance(instance, cls)."""</span>
+ <span class="c1"># Inline the cache checking when it's simple.</span>
+ <span class="n">subclass</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span> <span class="s1">'__class__'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">subclass</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">subclass</span> <span class="ow">in</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_cache</span><span class="p">:</span>
+ <span class="k">return</span> <span class="kc">True</span>
+ <span class="n">subtype</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">instance</span><span class="p">)</span>
+ <span class="c1"># Old-style instances</span>
+ <span class="k">if</span> <span class="n">subtype</span> <span class="ow">is</span> <span class="n">_InstanceType</span><span class="p">:</span>
+ <span class="n">subtype</span> <span class="o">=</span> <span class="n">subclass</span>
+ <span class="k">if</span> <span class="n">subtype</span> <span class="ow">is</span> <span class="n">subclass</span> <span class="ow">or</span> <span class="n">subclass</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">if</span> <span class="p">(</span><span class="bp">cls</span><span class="o">.</span><span class="n">_abc_negative_cache_version</span> <span class="o">==</span>
+ <span class="n">ABCMeta</span><span class="o">.</span><span class="n">_abc_invalidation_counter</span> <span class="ow">and</span>
+ <span class="n">subtype</span> <span class="ow">in</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_negative_cache</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">False</span>
+ <span class="c1"># Fall back to the subclass check.</span>
+ <span class="k">return</span> <span class="bp">cls</span><span class="o">.</span><span class="fm">__subclasscheck__</span><span class="p">(</span><span class="n">subtype</span><span class="p">)</span>
+ <span class="k">return</span> <span class="p">(</span><span class="bp">cls</span><span class="o">.</span><span class="fm">__subclasscheck__</span><span class="p">(</span><span class="n">subclass</span><span class="p">)</span> <span class="ow">or</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="fm">__subclasscheck__</span><span class="p">(</span><span class="n">subtype</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">__subclasscheck__</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">subclass</span><span class="p">):</span>
+ <span class="sd">"""Override for issubclass(subclass, cls)."""</span>
+ <span class="c1"># Check cache</span>
+ <span class="k">if</span> <span class="n">subclass</span> <span class="ow">in</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_cache</span><span class="p">:</span>
+ <span class="k">return</span> <span class="kc">True</span>
+ <span class="c1"># Check negative cache; may have to invalidate</span>
+ <span class="k">if</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_negative_cache_version</span> <span class="o"><</span> <span class="n">ABCMeta</span><span class="o">.</span><span class="n">_abc_invalidation_counter</span><span class="p">:</span>
+ <span class="c1"># Invalidate the negative cache</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_negative_cache</span> <span class="o">=</span> <span class="n">WeakSet</span><span class="p">()</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_negative_cache_version</span> <span class="o">=</span> <span class="n">ABCMeta</span><span class="o">.</span><span class="n">_abc_invalidation_counter</span>
+ <span class="k">elif</span> <span class="n">subclass</span> <span class="ow">in</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_negative_cache</span><span class="p">:</span>
+ <span class="k">return</span> <span class="kc">False</span>
+ <span class="c1"># Check the subclass hook</span>
+ <span class="n">ok</span> <span class="o">=</span> <span class="bp">cls</span><span class="o">.</span><span class="n">__subclasshook__</span><span class="p">(</span><span class="n">subclass</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">ok</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">NotImplemented</span><span class="p">:</span>
+ <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ok</span><span class="p">,</span> <span class="nb">bool</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">ok</span><span class="p">:</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_cache</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">subclass</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_negative_cache</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">subclass</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">ok</span>
+ <span class="c1"># Check if it's a direct subclass</span>
+ <span class="k">if</span> <span class="bp">cls</span> <span class="ow">in</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">subclass</span><span class="p">,</span> <span class="s1">'__mro__'</span><span class="p">,</span> <span class="p">()):</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_cache</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">subclass</span><span class="p">)</span>
+ <span class="k">return</span> <span class="kc">True</span>
+ <span class="c1"># Check if it's a subclass of a registered class (recursive)</span>
+ <span class="k">for</span> <span class="n">rcls</span> <span class="ow">in</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_registry</span><span class="p">:</span>
+ <span class="k">if</span> <span class="nb">issubclass</span><span class="p">(</span><span class="n">subclass</span><span class="p">,</span> <span class="n">rcls</span><span class="p">):</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_cache</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">subclass</span><span class="p">)</span>
+ <span class="k">return</span> <span class="kc">True</span>
+ <span class="c1"># Check if it's a subclass of a subclass (recursive)</span>
+ <span class="k">for</span> <span class="n">scls</span> <span class="ow">in</span> <span class="bp">cls</span><span class="o">.</span><span class="n">__subclasses__</span><span class="p">():</span>
+ <span class="k">if</span> <span class="nb">issubclass</span><span class="p">(</span><span class="n">subclass</span><span class="p">,</span> <span class="n">scls</span><span class="p">):</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_cache</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">subclass</span><span class="p">)</span>
+ <span class="k">return</span> <span class="kc">True</span>
+ <span class="c1"># No dice; update negative cache</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_abc_negative_cache</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">subclass</span><span class="p">)</span>
+ <span class="k">return</span> <span class="kc">False</span>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/coders/coders.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/coders/coders.html
new file mode 100644
index 0000000..dedc27e
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/coders/coders.html
@@ -0,0 +1,1216 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.coders.coders — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.coders.coders</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.coders.coders</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""Collection of useful coders.</span>
+
+<span class="sd">Only those coders listed in __all__ are part of the public API of this module.</span>
+<span class="sd">"""</span>
+<span class="kn">from</span> <span class="nn">__future__</span> <span class="k">import</span> <span class="n">absolute_import</span>
+
+<span class="kn">import</span> <span class="nn">base64</span>
+<span class="kn">import</span> <span class="nn">cPickle</span> <span class="k">as</span> <span class="nn">pickle</span>
+
+<span class="kn">import</span> <span class="nn">google.protobuf</span>
+<span class="kn">from</span> <span class="nn">google.protobuf</span> <span class="k">import</span> <span class="n">wrappers_pb2</span>
+
+<span class="kn">from</span> <span class="nn">apache_beam.coders</span> <span class="k">import</span> <span class="n">coder_impl</span>
+<span class="kn">from</span> <span class="nn">apache_beam.portability</span> <span class="k">import</span> <span class="n">common_urns</span>
+<span class="kn">from</span> <span class="nn">apache_beam.portability</span> <span class="k">import</span> <span class="n">python_urns</span>
+<span class="kn">from</span> <span class="nn">apache_beam.portability.api</span> <span class="k">import</span> <span class="n">beam_runner_api_pb2</span>
+<span class="kn">from</span> <span class="nn">apache_beam.utils</span> <span class="k">import</span> <span class="n">proto_utils</span>
+
+<span class="c1"># pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports</span>
+<span class="k">try</span><span class="p">:</span>
+ <span class="kn">from</span> <span class="nn">.stream</span> <span class="k">import</span> <span class="n">get_varint_size</span>
+<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="kn">from</span> <span class="nn">.slow_stream</span> <span class="k">import</span> <span class="n">get_varint_size</span>
+<span class="c1"># pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports</span>
+
+
+<span class="c1"># pylint: disable=wrong-import-order, wrong-import-position</span>
+<span class="c1"># Avoid dependencies on the full SDK.</span>
+<span class="k">try</span><span class="p">:</span>
+ <span class="c1"># Import dill from the pickler module to make sure our monkey-patching of dill</span>
+ <span class="c1"># occurs.</span>
+ <span class="kn">from</span> <span class="nn">apache_beam.internal.pickler</span> <span class="k">import</span> <span class="n">dill</span>
+<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="c1"># We fall back to using the stock dill library in tests that don't use the</span>
+ <span class="c1"># full Python SDK.</span>
+ <span class="kn">import</span> <span class="nn">dill</span>
+
+
+<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'Coder'</span><span class="p">,</span>
+ <span class="s1">'BytesCoder'</span><span class="p">,</span> <span class="s1">'DillCoder'</span><span class="p">,</span> <span class="s1">'FastPrimitivesCoder'</span><span class="p">,</span> <span class="s1">'FloatCoder'</span><span class="p">,</span>
+ <span class="s1">'IterableCoder'</span><span class="p">,</span> <span class="s1">'PickleCoder'</span><span class="p">,</span> <span class="s1">'ProtoCoder'</span><span class="p">,</span> <span class="s1">'SingletonCoder'</span><span class="p">,</span>
+ <span class="s1">'StrUtf8Coder'</span><span class="p">,</span> <span class="s1">'TimestampCoder'</span><span class="p">,</span> <span class="s1">'TupleCoder'</span><span class="p">,</span>
+ <span class="s1">'TupleSequenceCoder'</span><span class="p">,</span> <span class="s1">'VarIntCoder'</span><span class="p">,</span> <span class="s1">'WindowedValueCoder'</span><span class="p">]</span>
+
+
+<span class="k">def</span> <span class="nf">serialize_coder</span><span class="p">(</span><span class="n">coder</span><span class="p">):</span>
+ <span class="kn">from</span> <span class="nn">apache_beam.internal</span> <span class="k">import</span> <span class="n">pickler</span>
+ <span class="k">return</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">$</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">coder</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> <span class="n">pickler</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><sp [...]
+
+
+<span class="k">def</span> <span class="nf">deserialize_coder</span><span class="p">(</span><span class="n">serialized</span><span class="p">):</span>
+ <span class="kn">from</span> <span class="nn">apache_beam.internal</span> <span class="k">import</span> <span class="n">pickler</span>
+ <span class="k">return</span> <span class="n">pickler</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">serialized</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'$'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)[</span><span class="mi">1</span><span class="p">])</span>
+<span class="c1"># pylint: enable=wrong-import-order, wrong-import-position</span>
+
+
+<div class="viewcode-block" id="Coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder">[docs]</a><span class="k">class</span> <span class="nc">Coder</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""Base class for coders."""</span>
+
+<div class="viewcode-block" id="Coder.encode"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.encode">[docs]</a> <span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="sd">"""Encodes the given object into a byte string."""</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'Encode not implemented: </span><span class="si">%s</span><span class="s1">.'</span> <span class="o">%</span> <span class="bp">self</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="Coder.decode"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.decode">[docs]</a> <span class="k">def</span> <span class="nf">decode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">encoded</span><span class="p">):</span>
+ <span class="sd">"""Decodes the given byte string into the corresponding object."""</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'Decode not implemented: </span><span class="si">%s</span><span class="s1">.'</span> <span class="o">%</span> <span class="bp">self</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="Coder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Whether this coder is guaranteed to encode values deterministically.</span>
+
+<span class="sd"> A deterministic coder is required for key coders in GroupByKey operations</span>
+<span class="sd"> to produce consistent results.</span>
+
+<span class="sd"> For example, note that the default coder, the PickleCoder, is not</span>
+<span class="sd"> deterministic: the ordering of picked entries in maps may vary across</span>
+<span class="sd"> executions since there is no defined order, and such a coder is not in</span>
+<span class="sd"> general suitable for usage as a key coder in GroupByKey operations, since</span>
+<span class="sd"> each instance of the same key may be encoded differently.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> Whether coder is deterministic.</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="kc">False</span></div>
+
+<div class="viewcode-block" id="Coder.as_deterministic_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.as_deterministic_coder">[docs]</a> <span class="k">def</span> <span class="nf">as_deterministic_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">step_label</span><span class="p">,</span> <span class="n">error_message</span><span class="o">=</span><span class="kc">None</ [...]
+ <span class="sd">"""Returns a deterministic version of self, if possible.</span>
+
+<span class="sd"> Otherwise raises a value error.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">():</span>
+ <span class="k">return</span> <span class="bp">self</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">error_message</span> <span class="ow">or</span> <span class="s2">"'</span><span class="si">%s</span><span class="s2">' cannot be made deterministic."</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="Coder.estimate_size"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.estimate_size">[docs]</a> <span class="k">def</span> <span class="nf">estimate_size</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="sd">"""Estimates the encoded size of the given value, in bytes.</span>
+
+<span class="sd"> Dataflow estimates the encoded size of a PCollection processed in a pipeline</span>
+<span class="sd"> step by using the estimated size of a random sample of elements in that</span>
+<span class="sd"> PCollection.</span>
+
+<span class="sd"> The default implementation encodes the given value and returns its byte</span>
+<span class="sd"> size. If a coder can provide a fast estimate of the encoded size of a value</span>
+<span class="sd"> (e.g., if the encoding has a fixed size), it can provide its estimate here</span>
+<span class="sd"> to improve performance.</span>
+
+<span class="sd"> Arguments:</span>
+<span class="sd"> value: the value whose encoded size is to be estimated.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> The estimated encoded size of the given value.</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">value</span><span class="p">))</span></div>
+
+ <span class="c1"># ===========================================================================</span>
+ <span class="c1"># Methods below are internal SDK details that don't need to be modified for</span>
+ <span class="c1"># user-defined coders.</span>
+ <span class="c1"># ===========================================================================</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Creates a CoderImpl to do the actual encoding and decoding.</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">CallbackCoderImpl</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">encode</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">decode</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">estimate_size</span><span class="p">)</span>
+
+<div class="viewcode-block" id="Coder.get_impl"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.get_impl">[docs]</a> <span class="k">def</span> <span class="nf">get_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Returns the CoderImpl backing this Coder.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'_impl'</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_impl</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_create_impl</span><span class="p">()</span>
+ <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_impl</span><span class="p">,</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">CoderImpl</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_impl</span></div>
+
+ <span class="k">def</span> <span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dict_without_impl</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">_dict_without_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'_impl'</span><span class="p">):</span>
+ <span class="n">d</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
+ <span class="k">del</span> <span class="n">d</span><span class="p">[</span><span class="s1">'_impl'</span><span class="p">]</span>
+ <span class="k">return</span> <span class="n">d</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span>
+
+<div class="viewcode-block" id="Coder.from_type_hint"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.from_type_hint">[docs]</a> <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">from_type_hint</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">unused_typehint</span><span class="p">,</span> <span class="n">unused_registry</span><span class="p">):</span>
+ <span class="c1"># If not overridden, just construct the coder without arguments.</span>
+ <span class="k">return</span> <span class="bp">cls</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="Coder.is_kv_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.is_kv_coder">[docs]</a> <span class="k">def</span> <span class="nf">is_kv_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">False</span></div>
+
+<div class="viewcode-block" id="Coder.key_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.key_coder">[docs]</a> <span class="k">def</span> <span class="nf">key_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_kv_coder</span><span class="p">():</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'key_coder: </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="bp">self</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Not a KV coder: </span><span class="si">%s</span><span class="s1">.'</span> <span class="o">%</span> <span class="bp">self</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="Coder.value_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.value_coder">[docs]</a> <span class="k">def</span> <span class="nf">value_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_kv_coder</span><span class="p">():</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'value_coder: </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="bp">self</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Not a KV coder: </span><span class="si">%s</span><span class="s1">.'</span> <span class="o">%</span> <span class="bp">self</span><span class="p">)</span></div>
+
+ <span class="k">def</span> <span class="nf">_get_component_coders</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Returns the internal component coders of this coder."""</span>
+ <span class="c1"># This is an internal detail of the Coder API and does not need to be</span>
+ <span class="c1"># refined in user-defined Coders.</span>
+ <span class="k">return</span> <span class="p">[]</span>
+
+<div class="viewcode-block" id="Coder.as_cloud_object"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.as_cloud_object">[docs]</a> <span class="k">def</span> <span class="nf">as_cloud_object</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Returns Google Cloud Dataflow API description of this coder."""</span>
+ <span class="c1"># This is an internal detail of the Coder API and does not need to be</span>
+ <span class="c1"># refined in user-defined Coders.</span>
+
+ <span class="n">value</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="c1"># We pass coders in the form "<coder_name>$<pickled_data>" to make the</span>
+ <span class="c1"># job description JSON more readable. Data before the $ is ignored by</span>
+ <span class="c1"># the worker.</span>
+ <span class="s1">'@type'</span><span class="p">:</span> <span class="n">serialize_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span>
+ <span class="s1">'component_encodings'</span><span class="p">:</span> <span class="nb">list</span><span class="p">(</span>
+ <span class="n">component</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">component</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_component_coders</span><span class="p">()</span>
+ <span class="p">),</span>
+ <span class="p">}</span>
+ <span class="k">return</span> <span class="n">value</span></div>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="c1"># pylint: disable=protected-access</span>
+ <span class="k">return</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="vm">__class__</span>
+ <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dict_without_impl</span><span class="p">()</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">_dict_without_impl</span><span class="p">())</span>
+ <span class="c1"># pylint: enable=protected-access</span>
+
+ <span class="n">_known_urns</span> <span class="o">=</span> <span class="p">{}</span>
+
+<div class="viewcode-block" id="Coder.register_urn"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.register_urn">[docs]</a> <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">register_urn</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">urn</span><span class="p">,</span> <span class="n">parameter_type</span><span class="p">,</span> <span class="n">fn</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="sd">"""Registers a urn with a constructor.</span>
+
+<span class="sd"> For example, if 'beam:fn:foo' had parameter type FooPayload, one could</span>
+<span class="sd"> write `RunnerApiFn.register_urn('bean:fn:foo', FooPayload, foo_from_proto)`</span>
+<span class="sd"> where foo_from_proto took as arguments a FooPayload and a PipelineContext.</span>
+<span class="sd"> This function can also be used as a decorator rather than passing the</span>
+<span class="sd"> callable in as the final parameter.</span>
+
+<span class="sd"> A corresponding to_runner_api_parameter method would be expected that</span>
+<span class="sd"> returns the tuple ('beam:fn:foo', FooPayload)</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">register</span><span class="p">(</span><span class="n">fn</span><span class="p">):</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_known_urns</span><span class="p">[</span><span class="n">urn</span><span class="p">]</span> <span class="o">=</span> <span class="n">parameter_type</span><span class="p">,</span> <span class="n">fn</span>
+ <span class="k">return</span> <span class="nb">staticmethod</span><span class="p">(</span><span class="n">fn</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">fn</span><span class="p">:</span>
+ <span class="c1"># Used as a statement.</span>
+ <span class="n">register</span><span class="p">(</span><span class="n">fn</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c1"># Used as a decorator.</span>
+ <span class="k">return</span> <span class="n">register</span></div>
+
+<div class="viewcode-block" id="Coder.to_runner_api"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.to_runner_api">[docs]</a> <span class="k">def</span> <span class="nf">to_runner_api</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">urn</span><span class="p">,</span> <span class="n">typed_param</span><span class="p">,</span> <span class="n">components</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_runner_api_parameter</span><span class="p">(</span><span class="n">context</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">beam_runner_api_pb2</span><span class="o">.</span><span class="n">Coder</span><span class="p">(</span>
+ <span class="n">spec</span><span class="o">=</span><span class="n">beam_runner_api_pb2</span><span class="o">.</span><span class="n">SdkFunctionSpec</span><span class="p">(</span>
+ <span class="n">spec</span><span class="o">=</span><span class="n">beam_runner_api_pb2</span><span class="o">.</span><span class="n">FunctionSpec</span><span class="p">(</span>
+ <span class="n">urn</span><span class="o">=</span><span class="n">urn</span><span class="p">,</span>
+ <span class="n">payload</span><span class="o">=</span><span class="n">typed_param</span><span class="o">.</span><span class="n">SerializeToString</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">typed_param</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)),</span>
+ <span class="n">component_coder_ids</span><span class="o">=</span><span class="p">[</span><span class="n">context</span><span class="o">.</span><span class="n">coders</span><span class="o">.</span><span class="n">get_id</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">components</span><span class="p">])</span></div>
+
+<div class="viewcode-block" id="Coder.from_runner_api"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.from_runner_api">[docs]</a> <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">from_runner_api</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">coder_proto</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="sd">"""Converts from an SdkFunctionSpec to a Fn object.</span>
+
+<span class="sd"> Prefer registering a urn with its parameter type and constructor.</span>
+<span class="sd"> """</span>
+ <span class="n">parameter_type</span><span class="p">,</span> <span class="n">constructor</span> <span class="o">=</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_known_urns</span><span class="p">[</span><span class="n">coder_proto</span><span class="o">.</span><span class="n">spec</span><span class="o">.</span><span class="n">spec</span><span class="o">.</span><span class="n">urn</span><span class="p">]</span>
+ <span class="k">return</span> <span class="n">constructor</span><span class="p">(</span>
+ <span class="n">proto_utils</span><span class="o">.</span><span class="n">parse_Bytes</span><span class="p">(</span><span class="n">coder_proto</span><span class="o">.</span><span class="n">spec</span><span class="o">.</span><span class="n">spec</span><span class="o">.</span><span class="n">payload</span><span class="p">,</span> <span class="n">parameter_type</span><span class="p">),</span>
+ <span class="p">[</span><span class="n">context</span><span class="o">.</span><span class="n">coders</span><span class="o">.</span><span class="n">get_by_id</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">coder_proto</span><span class="o">.</span><span class="n">component_coder_ids</span><span class="p">],</span>
+ <span class="n">context</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="Coder.to_runner_api_parameter"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.to_runner_api_parameter">[docs]</a> <span class="k">def</span> <span class="nf">to_runner_api_parameter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span>
+ <span class="n">python_urns</span><span class="o">.</span><span class="n">PICKLED_CODER</span><span class="p">,</span>
+ <span class="n">wrappers_pb2</span><span class="o">.</span><span class="n">BytesValue</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">serialize_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">)),</span>
+ <span class="p">())</span></div>
+
+<div class="viewcode-block" id="Coder.register_structured_urn"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.Coder.register_structured_urn">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">register_structured_urn</span><span class="p">(</span><span class="n">urn</span><span class="p">,</span> <span class="bp">cls</span><span class="p">):</span>
+ <span class="sd">"""Register a coder that's completely defined by its urn and its</span>
+<span class="sd"> component(s), if any, which are passed to construct the instance.</span>
+<span class="sd"> """</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">to_runner_api_parameter</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="k">lambda</span> <span class="bp">self</span><span class="p">,</span> <span class="n">unused_context</span><span class="p">:</span> <span class="p">(</span><span class="n">urn</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_component_coders</span><span class="p">()))</span>
+
+ <span class="c1"># pylint: disable=unused-variable</span>
+ <span class="nd">@Coder</span><span class="o">.</span><span class="n">register_urn</span><span class="p">(</span><span class="n">urn</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">from_runner_api_parameter</span><span class="p">(</span><span class="n">unused_payload</span><span class="p">,</span> <span class="n">components</span><span class="p">,</span> <span class="n">unused_context</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">components</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">cls</span><span class="p">(</span><span class="o">*</span><span class="n">components</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">cls</span><span class="p">()</span></div></div>
+
+
+<span class="nd">@Coder</span><span class="o">.</span><span class="n">register_urn</span><span class="p">(</span>
+ <span class="n">python_urns</span><span class="o">.</span><span class="n">PICKLED_CODER</span><span class="p">,</span> <span class="n">google</span><span class="o">.</span><span class="n">protobuf</span><span class="o">.</span><span class="n">wrappers_pb2</span><span class="o">.</span><span class="n">BytesValue</span><span class="p">)</span>
+<span class="k">def</span> <span class="nf">_pickle_from_runner_api_parameter</span><span class="p">(</span><span class="n">payload</span><span class="p">,</span> <span class="n">components</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">deserialize_coder</span><span class="p">(</span><span class="n">payload</span><span class="o">.</span><span class="n">value</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="StrUtf8Coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.StrUtf8Coder">[docs]</a><span class="k">class</span> <span class="nc">StrUtf8Coder</span><span class="p">(</span><span class="n">Coder</span><span class="p">):</span>
+ <span class="sd">"""A coder used for reading and writing strings as UTF-8."""</span>
+
+<div class="viewcode-block" id="StrUtf8Coder.encode"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.StrUtf8Coder.encode">[docs]</a> <span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">value</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="StrUtf8Coder.decode"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.StrUtf8Coder.decode">[docs]</a> <span class="k">def</span> <span class="nf">decode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">value</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="StrUtf8Coder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.StrUtf8Coder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span></div></div>
+
+
+<span class="k">class</span> <span class="nc">ToStringCoder</span><span class="p">(</span><span class="n">Coder</span><span class="p">):</span>
+ <span class="sd">"""A default string coder used if no sink coder is specified."""</span>
+
+ <span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">unicode</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">value</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">value</span>
+ <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">decode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">_</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'ToStringCoder cannot be used for decoding.'</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span>
+
+
+<span class="k">class</span> <span class="nc">FastCoder</span><span class="p">(</span><span class="n">Coder</span><span class="p">):</span>
+ <span class="sd">"""Coder subclass used when a (faster) CoderImpl is supplied directly.</span>
+
+<span class="sd"> The Coder class defines _create_impl in terms of encode() and decode();</span>
+<span class="sd"> this class inverts that by defining encode() and decode() in terms of</span>
+<span class="sd"> _create_impl().</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="sd">"""Encodes the given object into a byte string."""</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_impl</span><span class="p">()</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">decode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">encoded</span><span class="p">):</span>
+ <span class="sd">"""Decodes the given byte string into the corresponding object."""</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_impl</span><span class="p">()</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">encoded</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">estimate_size</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_impl</span><span class="p">()</span><span class="o">.</span><span class="n">estimate_size</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span>
+
+
+<div class="viewcode-block" id="BytesCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.BytesCoder">[docs]</a><span class="k">class</span> <span class="nc">BytesCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""Byte string coder."""</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">BytesCoderImpl</span><span class="p">()</span>
+
+<div class="viewcode-block" id="BytesCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.BytesCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span></div>
+
+<div class="viewcode-block" id="BytesCoder.as_cloud_object"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.BytesCoder.as_cloud_object">[docs]</a> <span class="k">def</span> <span class="nf">as_cloud_object</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span>
+ <span class="s1">'@type'</span><span class="p">:</span> <span class="s1">'kind:bytes'</span><span class="p">,</span>
+ <span class="p">}</span></div>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
+
+
+<span class="n">Coder</span><span class="o">.</span><span class="n">register_structured_urn</span><span class="p">(</span><span class="n">common_urns</span><span class="o">.</span><span class="n">BYTES_CODER</span><span class="p">,</span> <span class="n">BytesCoder</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="VarIntCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.VarIntCoder">[docs]</a><span class="k">class</span> <span class="nc">VarIntCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""Variable-length integer coder."""</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">VarIntCoderImpl</span><span class="p">()</span>
+
+<div class="viewcode-block" id="VarIntCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.VarIntCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span></div>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
+
+
+<span class="n">Coder</span><span class="o">.</span><span class="n">register_structured_urn</span><span class="p">(</span><span class="n">common_urns</span><span class="o">.</span><span class="n">VARINT_CODER</span><span class="p">,</span> <span class="n">VarIntCoder</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="FloatCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.FloatCoder">[docs]</a><span class="k">class</span> <span class="nc">FloatCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""A coder used for floating-point values."""</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">FloatCoderImpl</span><span class="p">()</span>
+
+<div class="viewcode-block" id="FloatCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.FloatCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span></div>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
+
+
+<div class="viewcode-block" id="TimestampCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TimestampCoder">[docs]</a><span class="k">class</span> <span class="nc">TimestampCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""A coder used for timeutil.Timestamp values."""</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">TimestampCoderImpl</span><span class="p">()</span>
+
+<div class="viewcode-block" id="TimestampCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TimestampCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span></div>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
+
+
+<div class="viewcode-block" id="SingletonCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.SingletonCoder">[docs]</a><span class="k">class</span> <span class="nc">SingletonCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""A coder that always encodes exactly one value."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_value</span> <span class="o">=</span> <span class="n">value</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">SingletonCoderImpl</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_value</span><span class="p">)</span>
+
+<div class="viewcode-block" id="SingletonCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.SingletonCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span></div>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_value</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">_value</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_value</span><span class="p">)</span></div>
+
+
+<span class="k">def</span> <span class="nf">maybe_dill_dumps</span><span class="p">(</span><span class="n">o</span><span class="p">):</span>
+ <span class="sd">"""Pickle using cPickle or the Dill pickler as a fallback."""</span>
+ <span class="c1"># We need to use the dill pickler for objects of certain custom classes,</span>
+ <span class="c1"># including, for example, ones that contain lambdas.</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">pickle</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">o</span><span class="p">,</span> <span class="n">pickle</span><span class="o">.</span><span class="n">HIGHEST_PROTOCOL</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span>
+ <span class="k">return</span> <span class="n">dill</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">o</span><span class="p">)</span>
+
+
+<span class="k">def</span> <span class="nf">maybe_dill_loads</span><span class="p">(</span><span class="n">o</span><span class="p">):</span>
+ <span class="sd">"""Unpickle using cPickle or the Dill pickler as a fallback."""</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">pickle</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">o</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span>
+ <span class="k">return</span> <span class="n">dill</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">o</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">_PickleCoderBase</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""Base class for pickling coders."""</span>
+
+ <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="c1"># Note that the default coder, the PickleCoder, is not deterministic (for</span>
+ <span class="c1"># example, the ordering of picked entries in maps may vary across</span>
+ <span class="c1"># executions), and so is not in general suitable for usage as a key coder in</span>
+ <span class="c1"># GroupByKey operations.</span>
+ <span class="k">return</span> <span class="kc">False</span>
+
+ <span class="k">def</span> <span class="nf">as_cloud_object</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">is_pair_like</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
+ <span class="n">value</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">_PickleCoderBase</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">()</span>
+ <span class="c1"># We currently use this coder in places where we cannot infer the coder to</span>
+ <span class="c1"># use for the value type in a more granular way. In places where the</span>
+ <span class="c1"># service expects a pair, it checks for the "is_pair_like" key, in which</span>
+ <span class="c1"># case we would fail without the hack below.</span>
+ <span class="k">if</span> <span class="n">is_pair_like</span><span class="p">:</span>
+ <span class="n">value</span><span class="p">[</span><span class="s1">'is_pair_like'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span>
+ <span class="n">value</span><span class="p">[</span><span class="s1">'component_encodings'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">(</span><span class="n">is_pair_like</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">(</span><span class="n">is_pair_like</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+ <span class="p">]</span>
+
+ <span class="k">return</span> <span class="n">value</span>
+
+ <span class="c1"># We allow .key_coder() and .value_coder() to be called on PickleCoder since</span>
+ <span class="c1"># we can't always infer the return values of lambdas in ParDo operations, the</span>
+ <span class="c1"># result of which may be used in a GroupBykey.</span>
+ <span class="k">def</span> <span class="nf">is_kv_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span>
+
+ <span class="k">def</span> <span class="nf">key_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+ <span class="k">def</span> <span class="nf">value_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+
+
+<div class="viewcode-block" id="PickleCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.PickleCoder">[docs]</a><span class="k">class</span> <span class="nc">PickleCoder</span><span class="p">(</span><span class="n">_PickleCoderBase</span><span class="p">):</span>
+ <span class="sd">"""Coder using Python's pickle functionality."""</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">dumps</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">dumps</span>
+ <span class="n">HIGHEST_PROTOCOL</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">HIGHEST_PROTOCOL</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">CallbackCoderImpl</span><span class="p">(</span>
+ <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">dumps</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">HIGHEST_PROTOCOL</span><span class="p">),</span> <span class="n">pickle</span><span class="o">.</span><span class="n">loads</span><span class="p">)</span>
+
+<div class="viewcode-block" id="PickleCoder.as_deterministic_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.PickleCoder.as_deterministic_coder">[docs]</a> <span class="k">def</span> <span class="nf">as_deterministic_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">step_label</span><span class="p">,</span> <span class="n">error_message</span><span class="o">=</span><span class [...]
+ <span class="k">return</span> <span class="n">DeterministicFastPrimitivesCoder</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">step_label</span><span class="p">)</span></div></div>
+
+
+<div class="viewcode-block" id="DillCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.DillCoder">[docs]</a><span class="k">class</span> <span class="nc">DillCoder</span><span class="p">(</span><span class="n">_PickleCoderBase</span><span class="p">):</span>
+ <span class="sd">"""Coder using dill's pickle functionality."""</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">CallbackCoderImpl</span><span class="p">(</span><span class="n">maybe_dill_dumps</span><span class="p">,</span> <span class="n">maybe_dill_loads</span><span class="p">)</span></div>
+
+
+<span class="k">class</span> <span class="nc">DeterministicFastPrimitivesCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""Throws runtime errors when encoding non-deterministic values."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">coder</span><span class="p">,</span> <span class="n">step_label</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_underlying_coder</span> <span class="o">=</span> <span class="n">coder</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_step_label</span> <span class="o">=</span> <span class="n">step_label</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">DeterministicFastPrimitivesCoderImpl</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_underlying_coder</span><span class="o">.</span><span class="n">get_impl</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_step_label</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span>
+
+ <span class="k">def</span> <span class="nf">is_kv_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span>
+
+ <span class="k">def</span> <span class="nf">key_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+ <span class="k">def</span> <span class="nf">value_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+
+<div class="viewcode-block" id="FastPrimitivesCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.FastPrimitivesCoder">[docs]</a><span class="k">class</span> <span class="nc">FastPrimitivesCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""Encodes simple primitives (e.g. str, int) efficiently.</span>
+
+<span class="sd"> For unknown types, falls back to another coder (e.g. PickleCoder).</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fallback_coder</span><span class="o">=</span><span class="n">PickleCoder</span><span class="p">()):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_fallback_coder</span> <span class="o">=</span> <span class="n">fallback_coder</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">FastPrimitivesCoderImpl</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_fallback_coder</span><span class="o">.</span><span class="n">get_impl</span><span class="p">())</span>
+
+<div class="viewcode-block" id="FastPrimitivesCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.FastPrimitivesCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_fallback_coder</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="FastPrimitivesCoder.as_deterministic_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.FastPrimitivesCoder.as_deterministic_coder">[docs]</a> <span class="k">def</span> <span class="nf">as_deterministic_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">step_label</span><span class="p">,</span> <span class="n">error_message</span><span class="o">=</ [...]
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">():</span>
+ <span class="k">return</span> <span class="bp">self</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">DeterministicFastPrimitivesCoder</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">step_label</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FastPrimitivesCoder.as_cloud_object"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.FastPrimitivesCoder.as_cloud_object">[docs]</a> <span class="k">def</span> <span class="nf">as_cloud_object</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">is_pair_like</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
+ <span class="n">value</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">()</span>
+ <span class="c1"># We currently use this coder in places where we cannot infer the coder to</span>
+ <span class="c1"># use for the value type in a more granular way. In places where the</span>
+ <span class="c1"># service expects a pair, it checks for the "is_pair_like" key, in which</span>
+ <span class="c1"># case we would fail without the hack below.</span>
+ <span class="k">if</span> <span class="n">is_pair_like</span><span class="p">:</span>
+ <span class="n">value</span><span class="p">[</span><span class="s1">'is_pair_like'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span>
+ <span class="n">value</span><span class="p">[</span><span class="s1">'component_encodings'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">(</span><span class="n">is_pair_like</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">(</span><span class="n">is_pair_like</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+ <span class="p">]</span>
+
+ <span class="k">return</span> <span class="n">value</span></div>
+
+ <span class="c1"># We allow .key_coder() and .value_coder() to be called on FastPrimitivesCoder</span>
+ <span class="c1"># since we can't always infer the return values of lambdas in ParDo</span>
+ <span class="c1"># operations, the result of which may be used in a GroupBykey.</span>
+<div class="viewcode-block" id="FastPrimitivesCoder.is_kv_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.FastPrimitivesCoder.is_kv_coder">[docs]</a> <span class="k">def</span> <span class="nf">is_kv_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span></div>
+
+<div class="viewcode-block" id="FastPrimitivesCoder.key_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.FastPrimitivesCoder.key_coder">[docs]</a> <span class="k">def</span> <span class="nf">key_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span></div>
+
+<div class="viewcode-block" id="FastPrimitivesCoder.value_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.FastPrimitivesCoder.value_coder">[docs]</a> <span class="k">def</span> <span class="nf">value_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span></div>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
+
+
+<span class="k">class</span> <span class="nc">Base64PickleCoder</span><span class="p">(</span><span class="n">Coder</span><span class="p">):</span>
+ <span class="sd">"""Coder of objects by Python pickle, then base64 encoding."""</span>
+ <span class="c1"># TODO(robertwb): Do base64 encoding where it's needed (e.g. in json) rather</span>
+ <span class="c1"># than via a special Coder.</span>
+
+ <span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">base64</span><span class="o">.</span><span class="n">b64encode</span><span class="p">(</span><span class="n">pickle</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">pickle</span><span class="o">.</span><span class="n">HIGHEST_PROTOCOL</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">decode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">encoded</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">pickle</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">base64</span><span class="o">.</span><span class="n">b64decode</span><span class="p">(</span><span class="n">encoded</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="c1"># Note that the Base64PickleCoder is not deterministic. See the</span>
+ <span class="c1"># corresponding comments for PickleCoder above.</span>
+ <span class="k">return</span> <span class="kc">False</span>
+
+ <span class="c1"># We allow .key_coder() and .value_coder() to be called on Base64PickleCoder</span>
+ <span class="c1"># since we can't always infer the return values of lambdas in ParDo</span>
+ <span class="c1"># operations, the result of which may be used in a GroupBykey.</span>
+ <span class="c1">#</span>
+ <span class="c1"># TODO(ccy): this is currently only used for KV values from Create transforms.</span>
+ <span class="c1"># Investigate a way to unify this with PickleCoder.</span>
+ <span class="k">def</span> <span class="nf">is_kv_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span>
+
+ <span class="k">def</span> <span class="nf">key_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+ <span class="k">def</span> <span class="nf">value_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+
+<div class="viewcode-block" id="ProtoCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.ProtoCoder">[docs]</a><span class="k">class</span> <span class="nc">ProtoCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""A Coder for Google Protocol Buffers.</span>
+
+<span class="sd"> It supports both Protocol Buffers syntax versions 2 and 3. However,</span>
+<span class="sd"> the runtime version of the python protobuf library must exactly match the</span>
+<span class="sd"> version of the protoc compiler what was used to generate the protobuf</span>
+<span class="sd"> messages.</span>
+
+<span class="sd"> ProtoCoder is registered in the global CoderRegistry as the default coder for</span>
+<span class="sd"> any protobuf Message object.</span>
+
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">proto_message_type</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">proto_message_type</span> <span class="o">=</span> <span class="n">proto_message_type</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">ProtoCoderImpl</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">proto_message_type</span><span class="p">)</span>
+
+<div class="viewcode-block" id="ProtoCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.ProtoCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="c1"># TODO(vikasrk): A proto message can be deterministic if it does not contain</span>
+ <span class="c1"># a Map.</span>
+ <span class="k">return</span> <span class="kc">False</span></div>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">proto_message_type</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">proto_message_type</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">proto_message_type</span><span class="p">)</span>
+
+<div class="viewcode-block" id="ProtoCoder.from_type_hint"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.ProtoCoder.from_type_hint">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">from_type_hint</span><span class="p">(</span><span class="n">typehint</span><span class="p">,</span> <span class="n">unused_registry</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">issubclass</span><span class="p">(</span><span class="n">typehint</span><span class="p">,</span> <span class="n">google</span><span class="o">.</span><span class="n">protobuf</span><span class="o">.</span><span class="n">message</span><span class="o">.</span><span class="n">Message</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">ProtoCoder</span><span class="p">(</span><span class="n">typehint</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">((</span><span class="s1">'Expected a subclass of google.protobuf.message.Message'</span>
+ <span class="s1">', but got a </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">typehint</span><span class="p">))</span></div></div>
+
+
+<div class="viewcode-block" id="TupleCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder">[docs]</a><span class="k">class</span> <span class="nc">TupleCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""Coder of tuple objects."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">components</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_coders</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">components</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">TupleCoderImpl</span><span class="p">([</span><span class="n">c</span><span class="o">.</span><span class="n">get_impl</span><span class="p">()</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coders</span><span class="p">])</span>
+
+<div class="viewcode-block" id="TupleCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">all</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">()</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coders</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="TupleCoder.as_deterministic_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder.as_deterministic_coder">[docs]</a> <span class="k">def</span> <span class="nf">as_deterministic_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">step_label</span><span class="p">,</span> <span class="n">error_message</span><span class="o">=</span><span class=" [...]
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">():</span>
+ <span class="k">return</span> <span class="bp">self</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">TupleCoder</span><span class="p">([</span><span class="n">c</span><span class="o">.</span><span class="n">as_deterministic_coder</span><span class="p">(</span><span class="n">step_label</span><span class="p">,</span> <span class="n">error_message</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coders</span><span class="p">])</span></div>
+
+<div class="viewcode-block" id="TupleCoder.from_type_hint"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder.from_type_hint">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">from_type_hint</span><span class="p">(</span><span class="n">typehint</span><span class="p">,</span> <span class="n">registry</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">TupleCoder</span><span class="p">([</span><span class="n">registry</span><span class="o">.</span><span class="n">get_coder</span><span class="p">(</span><span class="n">t</span><span class="p">)</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">typehint</span><span class="o">.</span><span class="n">tuple_types</span><span class="p">])</span></div>
+
+<div class="viewcode-block" id="TupleCoder.as_cloud_object"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder.as_cloud_object">[docs]</a> <span class="k">def</span> <span class="nf">as_cloud_object</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_kv_coder</span><span class="p">():</span>
+ <span class="k">return</span> <span class="p">{</span>
+ <span class="s1">'@type'</span><span class="p">:</span> <span class="s1">'kind:pair'</span><span class="p">,</span>
+ <span class="s1">'is_pair_like'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
+ <span class="s1">'component_encodings'</span><span class="p">:</span> <span class="nb">list</span><span class="p">(</span>
+ <span class="n">component</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">component</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_component_coders</span><span class="p">()</span>
+ <span class="p">),</span>
+ <span class="p">}</span>
+
+ <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">TupleCoder</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">()</span></div>
+
+ <span class="k">def</span> <span class="nf">_get_component_coders</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">coders</span><span class="p">()</span>
+
+<div class="viewcode-block" id="TupleCoder.coders"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder.coders">[docs]</a> <span class="k">def</span> <span class="nf">coders</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coders</span></div>
+
+<div class="viewcode-block" id="TupleCoder.is_kv_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder.is_kv_coder">[docs]</a> <span class="k">def</span> <span class="nf">is_kv_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_coders</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span></div>
+
+<div class="viewcode-block" id="TupleCoder.key_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder.key_coder">[docs]</a> <span class="k">def</span> <span class="nf">key_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_coders</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'TupleCoder does not have exactly 2 components.'</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coders</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span></div>
+
+<div class="viewcode-block" id="TupleCoder.value_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder.value_coder">[docs]</a> <span class="k">def</span> <span class="nf">value_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_coders</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'TupleCoder does not have exactly 2 components.'</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coders</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span></div>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s1">'TupleCoder[</span><span class="si">%s</span><span class="s1">]'</span> <span class="o">%</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.< [...]
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coders</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coders</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_coders</span><span class="p">)</span>
+
+<div class="viewcode-block" id="TupleCoder.to_runner_api_parameter"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder.to_runner_api_parameter">[docs]</a> <span class="k">def</span> <span class="nf">to_runner_api_parameter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_kv_coder</span><span class="p">():</span>
+ <span class="k">return</span> <span class="n">common_urns</span><span class="o">.</span><span class="n">KV_CODER</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">coders</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">TupleCoder</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">to_runner_api_parameter</span><span class="p">(</span><span class="n">context</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="TupleCoder.from_runner_api_parameter"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleCoder.from_runner_api_parameter">[docs]</a> <span class="nd">@Coder</span><span class="o">.</span><span class="n">register_urn</span><span class="p">(</span><span class="n">common_urns</span><span class="o">.</span><span class="n">KV_CODER</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">from_runner_api_parameter</span><span class="p">(</span><span class="n">unused_payload</span><span class="p">,</span> <span class="n">components</span><span class="p">,</span> <span class="n">unused_context</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">TupleCoder</span><span class="p">(</span><span class="n">components</span><span class="p">)</span></div></div>
+
+
+<div class="viewcode-block" id="TupleSequenceCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleSequenceCoder">[docs]</a><span class="k">class</span> <span class="nc">TupleSequenceCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""Coder of homogeneous tuple objects."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">elem_coder</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span> <span class="o">=</span> <span class="n">elem_coder</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">TupleSequenceCoderImpl</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="o">.</span><span class="n">get_impl</span><span class="p">())</span>
+
+<div class="viewcode-block" id="TupleSequenceCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleSequenceCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="TupleSequenceCoder.as_deterministic_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleSequenceCoder.as_deterministic_coder">[docs]</a> <span class="k">def</span> <span class="nf">as_deterministic_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">step_label</span><span class="p">,</span> <span class="n">error_message</span><span class="o">=</sp [...]
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">():</span>
+ <span class="k">return</span> <span class="bp">self</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">TupleSequenceCoder</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="o">.</span><span class="n">as_deterministic_coder</span><span class="p">(</span><span class="n">step_label</span><span class="p">,</span> <span class="n">error_message</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="TupleSequenceCoder.from_type_hint"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.TupleSequenceCoder.from_type_hint">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">from_type_hint</span><span class="p">(</span><span class="n">typehint</span><span class="p">,</span> <span class="n">registry</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">TupleSequenceCoder</span><span class="p">(</span><span class="n">registry</span><span class="o">.</span><span class="n">get_coder</span><span class="p">(</span><span class="n">typehint</span><span class="o">.</span><span class="n">inner_type</span><span class="p">))</span></div>
+
+ <span class="k">def</span> <span class="nf">_get_component_coders</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="p">,)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s1">'TupleSequenceCoder[</span><span class="si">%r</span><span class="s1">]'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">((</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="p">))</span></div>
+
+
+<div class="viewcode-block" id="IterableCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.IterableCoder">[docs]</a><span class="k">class</span> <span class="nc">IterableCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""Coder of iterables of homogeneous objects."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">elem_coder</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span> <span class="o">=</span> <span class="n">elem_coder</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">IterableCoderImpl</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="o">.</span><span class="n">get_impl</span><span class="p">())</span>
+
+<div class="viewcode-block" id="IterableCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.IterableCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="IterableCoder.as_deterministic_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.IterableCoder.as_deterministic_coder">[docs]</a> <span class="k">def</span> <span class="nf">as_deterministic_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">step_label</span><span class="p">,</span> <span class="n">error_message</span><span class="o">=</span><span c [...]
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">():</span>
+ <span class="k">return</span> <span class="bp">self</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">IterableCoder</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="o">.</span><span class="n">as_deterministic_coder</span><span class="p">(</span><span class="n">step_label</span><span class="p">,</span> <span class="n">error_message</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="IterableCoder.as_cloud_object"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.IterableCoder.as_cloud_object">[docs]</a> <span class="k">def</span> <span class="nf">as_cloud_object</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span>
+ <span class="s1">'@type'</span><span class="p">:</span> <span class="s1">'kind:stream'</span><span class="p">,</span>
+ <span class="s1">'is_stream_like'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
+ <span class="s1">'component_encodings'</span><span class="p">:</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">()],</span>
+ <span class="p">}</span></div>
+
+<div class="viewcode-block" id="IterableCoder.value_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.IterableCoder.value_coder">[docs]</a> <span class="k">def</span> <span class="nf">value_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span></div>
+
+<div class="viewcode-block" id="IterableCoder.from_type_hint"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.IterableCoder.from_type_hint">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">from_type_hint</span><span class="p">(</span><span class="n">typehint</span><span class="p">,</span> <span class="n">registry</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">IterableCoder</span><span class="p">(</span><span class="n">registry</span><span class="o">.</span><span class="n">get_coder</span><span class="p">(</span><span class="n">typehint</span><span class="o">.</span><span class="n">inner_type</span><span class="p">))</span></div>
+
+ <span class="k">def</span> <span class="nf">_get_component_coders</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="p">,)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s1">'IterableCoder[</span><span class="si">%r</span><span class="s1">]'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">((</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_elem_coder</span><span class="p">))</span></div>
+
+
+<span class="n">Coder</span><span class="o">.</span><span class="n">register_structured_urn</span><span class="p">(</span><span class="n">common_urns</span><span class="o">.</span><span class="n">ITERABLE_CODER</span><span class="p">,</span> <span class="n">IterableCoder</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">GlobalWindowCoder</span><span class="p">(</span><span class="n">SingletonCoder</span><span class="p">):</span>
+ <span class="sd">"""Coder for global windows."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="kn">from</span> <span class="nn">apache_beam.transforms</span> <span class="k">import</span> <span class="n">window</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">GlobalWindowCoder</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window</span><span class="o">.</span><span class="n">GlobalWindow</span><span class="p">())</span>
+
+ <span class="k">def</span> <span class="nf">as_cloud_object</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span>
+ <span class="s1">'@type'</span><span class="p">:</span> <span class="s1">'kind:global_window'</span><span class="p">,</span>
+ <span class="p">}</span>
+
+
+<span class="n">Coder</span><span class="o">.</span><span class="n">register_structured_urn</span><span class="p">(</span>
+ <span class="n">common_urns</span><span class="o">.</span><span class="n">GLOBAL_WINDOW_CODER</span><span class="p">,</span> <span class="n">GlobalWindowCoder</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">IntervalWindowCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""Coder for an window defined by a start timestamp and a duration."""</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">IntervalWindowCoderImpl</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span>
+
+ <span class="k">def</span> <span class="nf">as_cloud_object</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span>
+ <span class="s1">'@type'</span><span class="p">:</span> <span class="s1">'kind:interval_window'</span><span class="p">,</span>
+ <span class="p">}</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+
+
+<span class="n">Coder</span><span class="o">.</span><span class="n">register_structured_urn</span><span class="p">(</span>
+ <span class="n">common_urns</span><span class="o">.</span><span class="n">INTERVAL_WINDOW_CODER</span><span class="p">,</span> <span class="n">IntervalWindowCoder</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="WindowedValueCoder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.WindowedValueCoder">[docs]</a><span class="k">class</span> <span class="nc">WindowedValueCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""Coder for windowed values."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">wrapped_value_coder</span><span class="p">,</span> <span class="n">window_coder</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">window_coder</span><span class="p">:</span>
+ <span class="n">window_coder</span> <span class="o">=</span> <span class="n">PickleCoder</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">wrapped_value_coder</span> <span class="o">=</span> <span class="n">wrapped_value_coder</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">timestamp_coder</span> <span class="o">=</span> <span class="n">TimestampCoder</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">window_coder</span> <span class="o">=</span> <span class="n">window_coder</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">WindowedValueCoderImpl</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">wrapped_value_coder</span><span class="o">.</span><span class="n">get_impl</span><span class="p">(),</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">timestamp_coder</span><span class="o">.</span><span class="n">get_impl</span><span class="p">(),</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">window_coder</span><span class="o">.</span><span class="n">get_impl</span><span class="p">())</span>
+
+<div class="viewcode-block" id="WindowedValueCoder.is_deterministic"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.WindowedValueCoder.is_deterministic">[docs]</a> <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">all</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">()</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">wrapped_value_coder</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">timestamp_coder</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">window_coder</span><span class="p">])</span></div>
+
+<div class="viewcode-block" id="WindowedValueCoder.as_cloud_object"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.WindowedValueCoder.as_cloud_object">[docs]</a> <span class="k">def</span> <span class="nf">as_cloud_object</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span>
+ <span class="s1">'@type'</span><span class="p">:</span> <span class="s1">'kind:windowed_value'</span><span class="p">,</span>
+ <span class="s1">'is_wrapper'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
+ <span class="s1">'component_encodings'</span><span class="p">:</span> <span class="p">[</span>
+ <span class="n">component</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">component</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_component_coders</span><span class="p">()],</span>
+ <span class="p">}</span></div>
+
+ <span class="k">def</span> <span class="nf">_get_component_coders</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">wrapped_value_coder</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">window_coder</span><span class="p">]</span>
+
+<div class="viewcode-block" id="WindowedValueCoder.is_kv_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.WindowedValueCoder.is_kv_coder">[docs]</a> <span class="k">def</span> <span class="nf">is_kv_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">wrapped_value_coder</span><span class="o">.</span><span class="n">is_kv_coder</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="WindowedValueCoder.key_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.WindowedValueCoder.key_coder">[docs]</a> <span class="k">def</span> <span class="nf">key_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">wrapped_value_coder</span><span class="o">.</span><span class="n">key_coder</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="WindowedValueCoder.value_coder"><a class="viewcode-back" href="../../../apache_beam.coders.coders.html#apache_beam.coders.coders.WindowedValueCoder.value_coder">[docs]</a> <span class="k">def</span> <span class="nf">value_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">wrapped_value_coder</span><span class="o">.</span><span class="n">value_coder</span><span class="p">()</span></div>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s1">'WindowedValueCoder[</span><span class="si">%s</span><span class="s1">]'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">wrapped_value_coder</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">wrapped_value_coder</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">wrapped_value_coder</span>
+ <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">timestamp_coder</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">timestamp_coder</span>
+ <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">window_coder</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">window_coder</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span>
+ <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">wrapped_value_coder</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">timestamp_coder</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">window_coder</span><span class="p">))</span></div>
+
+
+<span class="n">Coder</span><span class="o">.</span><span class="n">register_structured_urn</span><span class="p">(</span>
+ <span class="n">common_urns</span><span class="o">.</span><span class="n">WINDOWED_VALUE_CODER</span><span class="p">,</span> <span class="n">WindowedValueCoder</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">LengthPrefixCoder</span><span class="p">(</span><span class="n">FastCoder</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Coder which prefixes the length of the encoded object in the stream."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value_coder</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_value_coder</span> <span class="o">=</span> <span class="n">value_coder</span>
+
+ <span class="k">def</span> <span class="nf">_create_impl</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">coder_impl</span><span class="o">.</span><span class="n">LengthPrefixCoderImpl</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_value_coder</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">is_deterministic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_value_coder</span><span class="o">.</span><span class="n">is_deterministic</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">estimate_size</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="n">value_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_value_coder</span><span class="o">.</span><span class="n">estimate_size</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">get_varint_size</span><span class="p">(</span><span class="n">value_size</span><span class="p">)</span> <span class="o">+</span> <span class="n">value_size</span>
+
+ <span class="k">def</span> <span class="nf">value_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_value_coder</span>
+
+ <span class="k">def</span> <span class="nf">as_cloud_object</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span>
+ <span class="s1">'@type'</span><span class="p">:</span> <span class="s1">'kind:length_prefix'</span><span class="p">,</span>
+ <span class="s1">'component_encodings'</span><span class="p">:</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_value_coder</span><span class="o">.</span><span class="n">as_cloud_object</span><span class="p">()],</span>
+ <span class="p">}</span>
+
+ <span class="k">def</span> <span class="nf">_get_component_coders</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_value_coder</span><span class="p">,)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s1">'LengthPrefixCoder[</span><span class="si">%r</span><span class="s1">]'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_value_coder</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_value_coder</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">_value_coder</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">((</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_value_coder</span><span class="p">))</span>
+
+
+<span class="n">Coder</span><span class="o">.</span><span class="n">register_structured_urn</span><span class="p">(</span>
+ <span class="n">common_urns</span><span class="o">.</span><span class="n">LENGTH_PREFIX_CODER</span><span class="p">,</span> <span class="n">LengthPrefixCoder</span><span class="p">)</span>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/coders/observable.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/coders/observable.html
new file mode 100644
index 0000000..197441e
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/coders/observable.html
@@ -0,0 +1,277 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.coders.observable — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.coders.observable</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.coders.observable</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+
+<span class="sd">"""Observable base class for iterables.</span>
+
+<span class="sd">For internal use only; no backwards-compatibility guarantees.</span>
+<span class="sd">"""</span>
+
+
+<div class="viewcode-block" id="ObservableMixin"><a class="viewcode-back" href="../../../apache_beam.coders.observable.html#apache_beam.coders.observable.ObservableMixin">[docs]</a><span class="k">class</span> <span class="nc">ObservableMixin</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> An observable iterable.</span>
+
+<span class="sd"> Subclasses need to call self.notify_observers with any object yielded.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">observers</span> <span class="o">=</span> <span class="p">[]</span>
+
+<div class="viewcode-block" id="ObservableMixin.register_observer"><a class="viewcode-back" href="../../../apache_beam.coders.observable.html#apache_beam.coders.observable.ObservableMixin.register_observer">[docs]</a> <span class="k">def</span> <span class="nf">register_observer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">callback</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">observers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">callback</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="ObservableMixin.notify_observers"><a class="viewcode-back" href="../../../apache_beam.coders.observable.html#apache_beam.coders.observable.ObservableMixin.notify_observers">[docs]</a> <span class="k">def</span> <span class="nf">notify_observers</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="c1"># self.observers is almost always empty</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">observers</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">o</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">observers</span><span class="p">:</span>
+ <span class="n">o</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/coders/slow_stream.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/coders/slow_stream.html
new file mode 100644
index 0000000..b2a2e8a
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/coders/slow_stream.html
@@ -0,0 +1,408 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.coders.slow_stream — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.coders.slow_stream</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.coders.slow_stream</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""A pure Python implementation of stream.pyx.</span>
+
+<span class="sd">For internal use only; no backwards-compatibility guarantees.</span>
+<span class="sd">"""</span>
+
+<span class="kn">import</span> <span class="nn">struct</span>
+
+
+<div class="viewcode-block" id="OutputStream"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.OutputStream">[docs]</a><span class="k">class</span> <span class="nc">OutputStream</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> A pure Python implementation of stream.OutputStream."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="p">[]</span>
+
+<div class="viewcode-block" id="OutputStream.write"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.OutputStream.write">[docs]</a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">nested</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
+ <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">nested</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">write_var_int64</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">b</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">b</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="OutputStream.write_byte"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.OutputStream.write_byte">[docs]</a> <span class="k">def</span> <span class="nf">write_byte</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">val</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">chr</span><span class="p">(</span><span class="n">val</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="OutputStream.write_var_int64"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.OutputStream.write_var_int64">[docs]</a> <span class="k">def</span> <span class="nf">write_var_int64</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">v</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">v</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">v</span> <span class="o">+=</span> <span class="mi">1</span> <span class="o"><<</span> <span class="mi">64</span>
+ <span class="k">if</span> <span class="n">v</span> <span class="o"><=</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Value too large (negative).'</span><span class="p">)</span>
+ <span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
+ <span class="n">bits</span> <span class="o">=</span> <span class="n">v</span> <span class="o">&</span> <span class="mh">0x7F</span>
+ <span class="n">v</span> <span class="o">>>=</span> <span class="mi">7</span>
+ <span class="k">if</span> <span class="n">v</span><span class="p">:</span>
+ <span class="n">bits</span> <span class="o">|=</span> <span class="mh">0x80</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">write_byte</span><span class="p">(</span><span class="n">bits</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">v</span><span class="p">:</span>
+ <span class="k">break</span></div>
+
+<div class="viewcode-block" id="OutputStream.write_bigendian_int64"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.OutputStream.write_bigendian_int64">[docs]</a> <span class="k">def</span> <span class="nf">write_bigendian_int64</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">v</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">struct</span><span class="o">.</span><span class="n">pack</span><span class="p">(</span><span class="s1">'>q'</span><span class="p">,</span> <span class="n">v</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="OutputStream.write_bigendian_uint64"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.OutputStream.write_bigendian_uint64">[docs]</a> <span class="k">def</span> <span class="nf">write_bigendian_uint64</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">v</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">struct</span><span class="o">.</span><span class="n">pack</span><span class="p">(</span><span class="s1">'>Q'</span><span class="p">,</span> <span class="n">v</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="OutputStream.write_bigendian_int32"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.OutputStream.write_bigendian_int32">[docs]</a> <span class="k">def</span> <span class="nf">write_bigendian_int32</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">v</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">struct</span><span class="o">.</span><span class="n">pack</span><span class="p">(</span><span class="s1">'>i'</span><span class="p">,</span> <span class="n">v</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="OutputStream.write_bigendian_double"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.OutputStream.write_bigendian_double">[docs]</a> <span class="k">def</span> <span class="nf">write_bigendian_double</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">v</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">struct</span><span class="o">.</span><span class="n">pack</span><span class="p">(</span><span class="s1">'>d'</span><span class="p">,</span> <span class="n">v</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="OutputStream.get"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.OutputStream.get">[docs]</a> <span class="k">def</span> <span class="nf">get</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="OutputStream.size"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.OutputStream.size">[docs]</a> <span class="k">def</span> <span class="nf">size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">)</span></div></div>
+
+
+<div class="viewcode-block" id="ByteCountingOutputStream"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.ByteCountingOutputStream">[docs]</a><span class="k">class</span> <span class="nc">ByteCountingOutputStream</span><span class="p">(</span><span class="n">OutputStream</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> A pure Python implementation of stream.ByteCountingOutputStream."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="c1"># Note that we don't actually use any of the data initialized by our super.</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">ByteCountingOutputStream</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">count</span> <span class="o">=</span> <span class="mi">0</span>
+
+<div class="viewcode-block" id="ByteCountingOutputStream.write"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.ByteCountingOutputStream.write">[docs]</a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">byte_array</span><span class="p">,</span> <span class="n">nested</span><span class="o">=</span><span class="kc">False</span><spa [...]
+ <span class="n">blen</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">byte_array</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">nested</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">write_var_int64</span><span class="p">(</span><span class="n">blen</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">count</span> <span class="o">+=</span> <span class="n">blen</span></div>
+
+<div class="viewcode-block" id="ByteCountingOutputStream.write_byte"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.ByteCountingOutputStream.write_byte">[docs]</a> <span class="k">def</span> <span class="nf">write_byte</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">_</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">count</span> <span class="o">+=</span> <span class="mi">1</span></div>
+
+<div class="viewcode-block" id="ByteCountingOutputStream.get_count"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.ByteCountingOutputStream.get_count">[docs]</a> <span class="k">def</span> <span class="nf">get_count</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">count</span></div>
+
+<div class="viewcode-block" id="ByteCountingOutputStream.get"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.ByteCountingOutputStream.get">[docs]</a> <span class="k">def</span> <span class="nf">get</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+ <span class="k">def</span> <span class="nf">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s1">'<</span><span class="si">%s</span><span class="s1"> </span><span class="si">%s</span><span class="s1">>'</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">count</span><span class="p">)</ [...]
+
+
+<div class="viewcode-block" id="InputStream"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.InputStream">[docs]</a><span class="k">class</span> <span class="nc">InputStream</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> A pure Python implementation of stream.InputStream."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pos</span> <span class="o">=</span> <span class="mi">0</span>
+
+<div class="viewcode-block" id="InputStream.size"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.InputStream.size">[docs]</a> <span class="k">def</span> <span class="nf">size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">)</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos</span></div>
+
+<div class="viewcode-block" id="InputStream.read"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.InputStream.read">[docs]</a> <span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pos</span> <span class="o">+=</span> <span class="n">size</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">pos</span> <span class="o">-</span> <span class="n">size</span> <span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos</span><span class="p">]</span></div>
+
+<div class="viewcode-block" id="InputStream.read_all"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.InputStream.read_all">[docs]</a> <span class="k">def</span> <span class="nf">read_all</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">nested</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">read_var_int64</span><span class="p">()</span> <span class="k">if</span> <span class="n">nested</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">())</span></div>
+
+<div class="viewcode-block" id="InputStream.read_byte"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.InputStream.read_byte">[docs]</a> <span class="k">def</span> <span class="nf">read_byte</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pos</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="k">return</span> <span class="nb">ord</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">pos</span> <span class="o">-</span> <span class="mi">1</span><span class="p">])</span></div>
+
+<div class="viewcode-block" id="InputStream.read_var_int64"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.InputStream.read_var_int64">[docs]</a> <span class="k">def</span> <span class="nf">read_var_int64</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">shift</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
+ <span class="n">byte</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_byte</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">byte</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s1">'VarLong not terminated.'</span><span class="p">)</span>
+
+ <span class="n">bits</span> <span class="o">=</span> <span class="n">byte</span> <span class="o">&</span> <span class="mh">0x7F</span>
+ <span class="k">if</span> <span class="n">shift</span> <span class="o">>=</span> <span class="mi">64</span> <span class="ow">or</span> <span class="p">(</span><span class="n">shift</span> <span class="o">>=</span> <span class="mi">63</span> <span class="ow">and</span> <span class="n">bits</span> <span class="o">></span> <span class="mi">1</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s1">'VarLong too long.'</span><span class="p">)</span>
+ <span class="n">result</span> <span class="o">|=</span> <span class="n">bits</span> <span class="o"><<</span> <span class="n">shift</span>
+ <span class="n">shift</span> <span class="o">+=</span> <span class="mi">7</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">byte</span> <span class="o">&</span> <span class="mh">0x80</span><span class="p">:</span>
+ <span class="k">break</span>
+ <span class="k">if</span> <span class="n">result</span> <span class="o">>=</span> <span class="mi">1</span> <span class="o"><<</span> <span class="mi">63</span><span class="p">:</span>
+ <span class="n">result</span> <span class="o">-=</span> <span class="mi">1</span> <span class="o"><<</span> <span class="mi">64</span>
+ <span class="k">return</span> <span class="n">result</span></div>
+
+<div class="viewcode-block" id="InputStream.read_bigendian_int64"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.InputStream.read_bigendian_int64">[docs]</a> <span class="k">def</span> <span class="nf">read_bigendian_int64</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">struct</span><span class="o">.</span><span class="n">unpack</span><span class="p">(</span><span class="s1">'>q'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="mi">8</span><span class="p">))[</span><span class="mi">0</span><span class="p">]</span></div>
+
+<div class="viewcode-block" id="InputStream.read_bigendian_uint64"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.InputStream.read_bigendian_uint64">[docs]</a> <span class="k">def</span> <span class="nf">read_bigendian_uint64</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">struct</span><span class="o">.</span><span class="n">unpack</span><span class="p">(</span><span class="s1">'>Q'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="mi">8</span><span class="p">))[</span><span class="mi">0</span><span class="p">]</span></div>
+
+<div class="viewcode-block" id="InputStream.read_bigendian_int32"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.InputStream.read_bigendian_int32">[docs]</a> <span class="k">def</span> <span class="nf">read_bigendian_int32</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">struct</span><span class="o">.</span><span class="n">unpack</span><span class="p">(</span><span class="s1">'>i'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="mi">4</span><span class="p">))[</span><span class="mi">0</span><span class="p">]</span></div>
+
+<div class="viewcode-block" id="InputStream.read_bigendian_double"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.InputStream.read_bigendian_double">[docs]</a> <span class="k">def</span> <span class="nf">read_bigendian_double</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">struct</span><span class="o">.</span><span class="n">unpack</span><span class="p">(</span><span class="s1">'>d'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="mi">8</span><span class="p">))[</span><span class="mi">0</span><span class="p">]</span></div></div>
+
+
+<div class="viewcode-block" id="get_varint_size"><a class="viewcode-back" href="../../../apache_beam.coders.slow_stream.html#apache_beam.coders.slow_stream.get_varint_size">[docs]</a><span class="k">def</span> <span class="nf">get_varint_size</span><span class="p">(</span><span class="n">v</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Returns the size of the given integer value when encode as a VarInt."""</span>
+ <span class="k">if</span> <span class="n">v</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">v</span> <span class="o">+=</span> <span class="mi">1</span> <span class="o"><<</span> <span class="mi">64</span>
+ <span class="k">if</span> <span class="n">v</span> <span class="o"><=</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Value too large (negative).'</span><span class="p">)</span>
+ <span class="n">varint_size</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
+ <span class="n">varint_size</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">v</span> <span class="o">>>=</span> <span class="mi">7</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">v</span><span class="p">:</span>
+ <span class="k">break</span>
+ <span class="k">return</span> <span class="n">varint_size</span></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/error.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/error.html
new file mode 100644
index 0000000..26606ad
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/error.html
@@ -0,0 +1,280 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.error — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../genindex.html"/>
+ <link rel="search" title="Search" href="../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../index.html"/>
+ <link rel="up" title="Module code" href="../index.html"/>
+
+
+ <script src="../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../index.html">Docs</a> »</li>
+
+ <li><a href="../index.html">Module code</a> »</li>
+
+ <li>apache_beam.error</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.error</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""Python Dataflow error classes."""</span>
+
+
+<div class="viewcode-block" id="BeamError"><a class="viewcode-back" href="../../apache_beam.error.html#apache_beam.error.BeamError">[docs]</a><span class="k">class</span> <span class="nc">BeamError</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
+ <span class="sd">"""Base class for all Beam errors."""</span></div>
+
+
+<div class="viewcode-block" id="PipelineError"><a class="viewcode-back" href="../../apache_beam.error.html#apache_beam.error.PipelineError">[docs]</a><span class="k">class</span> <span class="nc">PipelineError</span><span class="p">(</span><span class="n">BeamError</span><span class="p">):</span>
+ <span class="sd">"""An error in the pipeline object (e.g. a PValue not linked to it)."""</span></div>
+
+
+<div class="viewcode-block" id="PValueError"><a class="viewcode-back" href="../../apache_beam.error.html#apache_beam.error.PValueError">[docs]</a><span class="k">class</span> <span class="nc">PValueError</span><span class="p">(</span><span class="n">BeamError</span><span class="p">):</span>
+ <span class="sd">"""An error related to a PValue object (e.g. value is not computed)."""</span></div>
+
+
+<div class="viewcode-block" id="RunnerError"><a class="viewcode-back" href="../../apache_beam.error.html#apache_beam.error.RunnerError">[docs]</a><span class="k">class</span> <span class="nc">RunnerError</span><span class="p">(</span><span class="n">BeamError</span><span class="p">):</span>
+ <span class="sd">"""An error related to a Runner object (e.g. cannot find a runner to run)."""</span></div>
+
+
+<div class="viewcode-block" id="RuntimeValueProviderError"><a class="viewcode-back" href="../../apache_beam.error.html#apache_beam.error.RuntimeValueProviderError">[docs]</a><span class="k">class</span> <span class="nc">RuntimeValueProviderError</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
+ <span class="sd">"""An error related to a ValueProvider object raised during runtime."""</span></div>
+
+
+<div class="viewcode-block" id="SideInputError"><a class="viewcode-back" href="../../apache_beam.error.html#apache_beam.error.SideInputError">[docs]</a><span class="k">class</span> <span class="nc">SideInputError</span><span class="p">(</span><span class="n">BeamError</span><span class="p">):</span>
+ <span class="sd">"""An error related to a side input to a parallel Do operation."""</span></div>
+
+
+<div class="viewcode-block" id="TransformError"><a class="viewcode-back" href="../../apache_beam.error.html#apache_beam.error.TransformError">[docs]</a><span class="k">class</span> <span class="nc">TransformError</span><span class="p">(</span><span class="n">BeamError</span><span class="p">):</span>
+ <span class="sd">"""An error related to a PTransform object."""</span></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/gcp/auth.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/gcp/auth.html
new file mode 100644
index 0000000..454eff5
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/gcp/auth.html
@@ -0,0 +1,365 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.internal.gcp.auth — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../../index.html"/>
+ <link rel="up" title="Module code" href="../../../index.html"/>
+
+
+ <script src="../../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.internal.gcp.auth</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.internal.gcp.auth</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""Dataflow credentials and authentication."""</span>
+
+<span class="kn">import</span> <span class="nn">datetime</span>
+<span class="kn">import</span> <span class="nn">json</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">os</span>
+
+<span class="kn">from</span> <span class="nn">oauth2client.client</span> <span class="k">import</span> <span class="n">GoogleCredentials</span>
+<span class="kn">from</span> <span class="nn">oauth2client.client</span> <span class="k">import</span> <span class="n">OAuth2Credentials</span>
+
+<span class="kn">from</span> <span class="nn">apache_beam.utils</span> <span class="k">import</span> <span class="n">retry</span>
+<span class="kn">from</span> <span class="nn">six.moves.urllib.request</span> <span class="k">import</span> <span class="n">Request</span>
+<span class="kn">from</span> <span class="nn">six.moves.urllib.request</span> <span class="k">import</span> <span class="n">urlopen</span>
+
+<span class="c1"># When we are running in GCE, we can authenticate with VM credentials.</span>
+<span class="n">is_running_in_gce</span> <span class="o">=</span> <span class="kc">False</span>
+
+<span class="c1"># When we are running in GCE, this value is set based on worker startup</span>
+<span class="c1"># information.</span>
+<span class="n">executing_project</span> <span class="o">=</span> <span class="kc">None</span>
+
+
+<div class="viewcode-block" id="set_running_in_gce"><a class="viewcode-back" href="../../../../apache_beam.internal.gcp.auth.html#apache_beam.internal.gcp.auth.set_running_in_gce">[docs]</a><span class="k">def</span> <span class="nf">set_running_in_gce</span><span class="p">(</span><span class="n">worker_executing_project</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Informs the authentication library that we are running in GCE.</span>
+
+<span class="sd"> When we are running in GCE, we have the option of using the VM metadata</span>
+<span class="sd"> credentials for authentication to Google services.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> worker_executing_project: The project running the workflow. This information</span>
+<span class="sd"> comes from worker startup information.</span>
+<span class="sd"> """</span>
+ <span class="k">global</span> <span class="n">is_running_in_gce</span>
+ <span class="k">global</span> <span class="n">executing_project</span>
+ <span class="n">is_running_in_gce</span> <span class="o">=</span> <span class="kc">True</span>
+ <span class="n">executing_project</span> <span class="o">=</span> <span class="n">worker_executing_project</span></div>
+
+
+<div class="viewcode-block" id="AuthenticationException"><a class="viewcode-back" href="../../../../apache_beam.internal.gcp.auth.html#apache_beam.internal.gcp.auth.AuthenticationException">[docs]</a><span class="k">class</span> <span class="nc">AuthenticationException</span><span class="p">(</span><span class="n">retry</span><span class="o">.</span><span class="n">PermanentException</span><span class="p">):</span>
+ <span class="k">pass</span></div>
+
+
+<span class="k">class</span> <span class="nc">_GCEMetadataCredentials</span><span class="p">(</span><span class="n">OAuth2Credentials</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Credential object initialized using access token from GCE VM metadata."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">user_agent</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="sd">"""Create an instance of GCEMetadataCredentials.</span>
+
+<span class="sd"> These credentials are generated by contacting the metadata server on a GCE</span>
+<span class="sd"> VM instance.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> user_agent: string, The HTTP User-Agent to provide for this application.</span>
+<span class="sd"> """</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">_GCEMetadataCredentials</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
+ <span class="kc">None</span><span class="p">,</span> <span class="c1"># access_token</span>
+ <span class="kc">None</span><span class="p">,</span> <span class="c1"># client_id</span>
+ <span class="kc">None</span><span class="p">,</span> <span class="c1"># client_secret</span>
+ <span class="kc">None</span><span class="p">,</span> <span class="c1"># refresh_token</span>
+ <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="p">(</span><span class="mi">2010</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="c1"># token_expiry, set to time in past.</span>
+ <span class="kc">None</span><span class="p">,</span> <span class="c1"># token_uri</span>
+ <span class="n">user_agent</span><span class="p">)</span>
+
+ <span class="nd">@retry</span><span class="o">.</span><span class="n">with_exponential_backoff</span><span class="p">(</span>
+ <span class="n">retry_filter</span><span class="o">=</span><span class="n">retry</span><span class="o">.</span><span class="n">retry_on_server_errors_and_timeout_filter</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">_refresh</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">http_request</span><span class="p">):</span>
+ <span class="n">refresh_time</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="n">metadata_root</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span>
+ <span class="s1">'GCE_METADATA_ROOT'</span><span class="p">,</span> <span class="s1">'metadata.google.internal'</span><span class="p">)</span>
+ <span class="n">token_url</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'http://</span><span class="si">{}</span><span class="s1">/computeMetadata/v1/instance/service-accounts/'</span>
+ <span class="s1">'default/token'</span><span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">metadata_root</span><span class="p">)</span>
+ <span class="n">req</span> <span class="o">=</span> <span class="n">Request</span><span class="p">(</span><span class="n">token_url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="p">{</span><span class="s1">'Metadata-Flavor'</span><span class="p">:</span> <span class="s1">'Google'</span><span class="p">})</span>
+ <span class="n">token_data</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">urlopen</span><span class="p">(</span><span class="n">req</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">access_token</span> <span class="o">=</span> <span class="n">token_data</span><span class="p">[</span><span class="s1">'access_token'</span><span class="p">]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">token_expiry</span> <span class="o">=</span> <span class="p">(</span><span class="n">refresh_time</span> <span class="o">+</span>
+ <span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="n">token_data</span><span class="p">[</span><span class="s1">'expires_in'</span><span class="p">]))</span>
+
+
+<div class="viewcode-block" id="get_service_credentials"><a class="viewcode-back" href="../../../../apache_beam.internal.gcp.auth.html#apache_beam.internal.gcp.auth.get_service_credentials">[docs]</a><span class="k">def</span> <span class="nf">get_service_credentials</span><span class="p">():</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Get credentials to access Google services."""</span>
+ <span class="n">user_agent</span> <span class="o">=</span> <span class="s1">'beam-python-sdk/1.0'</span>
+ <span class="k">if</span> <span class="n">is_running_in_gce</span><span class="p">:</span>
+ <span class="c1"># We are currently running as a GCE taskrunner worker.</span>
+ <span class="c1">#</span>
+ <span class="c1"># TODO(ccy): It's not entirely clear if these credentials are thread-safe.</span>
+ <span class="c1"># If so, we can cache these credentials to save the overhead of creating</span>
+ <span class="c1"># them again.</span>
+ <span class="k">return</span> <span class="n">_GCEMetadataCredentials</span><span class="p">(</span><span class="n">user_agent</span><span class="o">=</span><span class="n">user_agent</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">client_scopes</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="s1">'https://www.googleapis.com/auth/bigquery'</span><span class="p">,</span>
+ <span class="s1">'https://www.googleapis.com/auth/cloud-platform'</span><span class="p">,</span>
+ <span class="s1">'https://www.googleapis.com/auth/devstorage.full_control'</span><span class="p">,</span>
+ <span class="s1">'https://www.googleapis.com/auth/userinfo.email'</span><span class="p">,</span>
+ <span class="s1">'https://www.googleapis.com/auth/datastore'</span>
+ <span class="p">]</span>
+
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">credentials</span> <span class="o">=</span> <span class="n">GoogleCredentials</span><span class="o">.</span><span class="n">get_application_default</span><span class="p">()</span>
+ <span class="n">credentials</span> <span class="o">=</span> <span class="n">credentials</span><span class="o">.</span><span class="n">create_scoped</span><span class="p">(</span><span class="n">client_scopes</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'Connecting using Google Application Default '</span>
+ <span class="s1">'Credentials.'</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">credentials</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
+ <span class="s1">'Unable to find default credentials to use: </span><span class="si">%s</span><span class="se">\n</span><span class="s1">'</span>
+ <span class="s1">'Connecting anonymously.'</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
+ <span class="k">return</span> <span class="kc">None</span></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/gcp/json_value.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/gcp/json_value.html
new file mode 100644
index 0000000..94b0bd9
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/gcp/json_value.html
@@ -0,0 +1,397 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.internal.gcp.json_value — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../../index.html"/>
+ <link rel="up" title="Module code" href="../../../index.html"/>
+
+
+ <script src="../../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.internal.gcp.json_value</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.internal.gcp.json_value</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""JSON conversion utility functions."""</span>
+
+<span class="c1"># Protect against environments where apitools library is not available.</span>
+<span class="c1"># pylint: disable=wrong-import-order, wrong-import-position</span>
+<span class="k">try</span><span class="p">:</span>
+ <span class="kn">from</span> <span class="nn">apitools.base.py</span> <span class="k">import</span> <span class="n">extra_types</span>
+<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="n">extra_types</span> <span class="o">=</span> <span class="kc">None</span>
+<span class="c1"># pylint: enable=wrong-import-order, wrong-import-position</span>
+
+<span class="kn">import</span> <span class="nn">six</span>
+
+<span class="kn">from</span> <span class="nn">apache_beam.options.value_provider</span> <span class="k">import</span> <span class="n">ValueProvider</span>
+
+<span class="n">_MAXINT64</span> <span class="o">=</span> <span class="p">(</span><span class="mi">1</span> <span class="o"><<</span> <span class="mi">63</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span>
+<span class="n">_MININT64</span> <span class="o">=</span> <span class="o">-</span> <span class="p">(</span><span class="mi">1</span> <span class="o"><<</span> <span class="mi">63</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="get_typed_value_descriptor"><a class="viewcode-back" href="../../../../apache_beam.internal.gcp.json_value.html#apache_beam.internal.gcp.json_value.get_typed_value_descriptor">[docs]</a><span class="k">def</span> <span class="nf">get_typed_value_descriptor</span><span class="p">(</span><span class="n">obj</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Converts a basic type into a @type/value dictionary.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> obj: A basestring, bool, int, or float to be converted.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> A dictionary containing the keys ``@type`` and ``value`` with the value for</span>
+<span class="sd"> the ``@type`` of appropriate type.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ~exceptions.TypeError: if the Python object has a type that is not</span>
+<span class="sd"> supported.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">):</span>
+ <span class="n">type_name</span> <span class="o">=</span> <span class="s1">'Text'</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="nb">bool</span><span class="p">):</span>
+ <span class="n">type_name</span> <span class="o">=</span> <span class="s1">'Boolean'</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
+ <span class="n">type_name</span> <span class="o">=</span> <span class="s1">'Integer'</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
+ <span class="n">type_name</span> <span class="o">=</span> <span class="s1">'Float'</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'Cannot get a type descriptor for </span><span class="si">%s</span><span class="s1">.'</span> <span class="o">%</span> <span class="nb">repr</span><span class="p">(</span><span class="n">obj</span><span class="p">))</span>
+ <span class="k">return</span> <span class="p">{</span><span class="s1">'@type'</span><span class="p">:</span> <span class="s1">'http://schema.org/</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">type_name</span><span class="p">,</span> <span class="s1">'value'</span><span class="p">:</span> <span class="n">obj</span><span class="p">}</span></div>
+
+
+<div class="viewcode-block" id="to_json_value"><a class="viewcode-back" href="../../../../apache_beam.internal.gcp.json_value.html#apache_beam.internal.gcp.json_value.to_json_value">[docs]</a><span class="k">def</span> <span class="nf">to_json_value</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">with_type</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Converts Python objects into extra_types.JsonValue objects.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> obj: Python object to be converted. Can be :data:`None`.</span>
+<span class="sd"> with_type: If true then the basic types (``string``, ``int``, ``float``,</span>
+<span class="sd"> ``bool``) will be wrapped in ``@type:value`` dictionaries. Otherwise the</span>
+<span class="sd"> straight value is encoded into a ``JsonValue``.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> A ``JsonValue`` object using ``JsonValue``, ``JsonArray`` and ``JsonObject``</span>
+<span class="sd"> types for the corresponding values, lists, or dictionaries.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ~exceptions.TypeError: if the Python object contains a type that is not</span>
+<span class="sd"> supported.</span>
+
+<span class="sd"> The types supported are ``str``, ``bool``, ``list``, ``tuple``, ``dict``, and</span>
+<span class="sd"> ``None``. The Dataflow API requires JsonValue(s) in many places, and it is</span>
+<span class="sd"> quite convenient to be able to specify these hierarchical objects using</span>
+<span class="sd"> Python syntax.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="n">obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonValue</span><span class="p">(</span><span class="n">is_null</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)):</span>
+ <span class="k">return</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonValue</span><span class="p">(</span>
+ <span class="n">array_value</span><span class="o">=</span><span class="n">extra_types</span><span class="o">.</span><span class="n">JsonArray</span><span class="p">(</span>
+ <span class="n">entries</span><span class="o">=</span><span class="p">[</span><span class="n">to_json_value</span><span class="p">(</span><span class="n">o</span><span class="p">,</span> <span class="n">with_type</span><span class="o">=</span><span class="n">with_type</span><span class="p">)</span> <span class="k">for</span> <span class="n">o</span> <span class="ow">in</span> <span class="n">obj</span><span class="p">]))</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+ <span class="n">json_object</span> <span class="o">=</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonObject</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
+ <span class="n">json_object</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
+ <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonObject</span><span class="o">.</span><span class="n">Property</span><span class="p">(</span>
+ <span class="n">key</span><span class="o">=</span><span class="n">k</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">to_json_value</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">with_type</span><span class="o">=</span><span class="n">with_type</span><span class="p">)))</span>
+ <span class="k">return</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonValue</span><span class="p">(</span><span class="n">object_value</span><span class="o">=</span><span class="n">json_object</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">with_type</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">to_json_value</span><span class="p">(</span><span class="n">get_typed_value_descriptor</span><span class="p">(</span><span class="n">obj</span><span class="p">),</span> <span class="n">with_type</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonValue</span><span class="p">(</span><span class="n">string_value</span><span class="o">=</span><span class="n">obj</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="nb">bool</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonValue</span><span class="p">(</span><span class="n">boolean_value</span><span class="o">=</span><span class="n">obj</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">integer_types</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">_MININT64</span> <span class="o"><=</span> <span class="n">obj</span> <span class="o"><=</span> <span class="n">_MAXINT64</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonValue</span><span class="p">(</span><span class="n">integer_value</span><span class="o">=</span><span class="n">obj</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'Can not encode </span><span class="si">{}</span><span class="s1"> as a 64-bit integer'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">obj</span><span class="p">))</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonValue</span><span class="p">(</span><span class="n">double_value</span><span class="o">=</span><span class="n">obj</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">ValueProvider</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">obj</span><span class="o">.</span><span class="n">is_accessible</span><span class="p">():</span>
+ <span class="k">return</span> <span class="n">to_json_value</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">get</span><span class="p">())</span>
+ <span class="k">return</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonValue</span><span class="p">(</span><span class="n">is_null</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'Cannot convert </span><span class="si">%s</span><span class="s1"> to a JSON value.'</span> <span class="o">%</span> <span class="nb">repr</span><span class="p">(</span><span class="n">obj</span><span class="p">))</span></div>
+
+
+<div class="viewcode-block" id="from_json_value"><a class="viewcode-back" href="../../../../apache_beam.internal.gcp.json_value.html#apache_beam.internal.gcp.json_value.from_json_value">[docs]</a><span class="k">def</span> <span class="nf">from_json_value</span><span class="p">(</span><span class="n">v</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Converts ``extra_types.JsonValue`` objects into Python objects.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> v: ``JsonValue`` object to be converted.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> A Python object structured as values, lists, and dictionaries corresponding</span>
+<span class="sd"> to ``JsonValue``, ``JsonArray`` and ``JsonObject`` types.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ~exceptions.TypeError: if the ``JsonValue`` object contains a type that is</span>
+<span class="sd"> not supported.</span>
+
+<span class="sd"> The types supported are ``str``, ``bool``, ``list``, ``dict``, and ``None``.</span>
+<span class="sd"> The Dataflow API returns JsonValue(s) in many places and it is quite</span>
+<span class="sd"> convenient to be able to convert these hierarchical objects to much simpler</span>
+<span class="sd"> Python objects.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonValue</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">v</span><span class="o">.</span><span class="n">string_value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">v</span><span class="o">.</span><span class="n">string_value</span>
+ <span class="k">elif</span> <span class="n">v</span><span class="o">.</span><span class="n">boolean_value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">v</span><span class="o">.</span><span class="n">boolean_value</span>
+ <span class="k">elif</span> <span class="n">v</span><span class="o">.</span><span class="n">integer_value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">v</span><span class="o">.</span><span class="n">integer_value</span>
+ <span class="k">elif</span> <span class="n">v</span><span class="o">.</span><span class="n">double_value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">v</span><span class="o">.</span><span class="n">double_value</span>
+ <span class="k">elif</span> <span class="n">v</span><span class="o">.</span><span class="n">array_value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">from_json_value</span><span class="p">(</span><span class="n">v</span><span class="o">.</span><span class="n">array_value</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">v</span><span class="o">.</span><span class="n">object_value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">from_json_value</span><span class="p">(</span><span class="n">v</span><span class="o">.</span><span class="n">object_value</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">v</span><span class="o">.</span><span class="n">is_null</span><span class="p">:</span>
+ <span class="k">return</span> <span class="kc">None</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonArray</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">[</span><span class="n">from_json_value</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">v</span><span class="o">.</span><span class="n">entries</span><span class="p">]</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">extra_types</span><span class="o">.</span><span class="n">JsonObject</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span><span class="n">p</span><span class="o">.</span><span class="n">key</span><span class="p">:</span> <span class="n">from_json_value</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">v</span><span class="o">.</span><span class="n">properties</span><span class= [...]
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'Cannot convert </span><span class="si">%s</span><span class="s1"> from a JSON value.'</span> <span class="o">%</span> <span class="nb">repr</span><span class="p">(</span><span class="n">v</span><span class="p">))</span></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/pickler.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/pickler.html
new file mode 100644
index 0000000..81d28a9
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/pickler.html
@@ -0,0 +1,481 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.internal.pickler — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.internal.pickler</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.internal.pickler</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""Pickler for values, functions, and classes.</span>
+
+<span class="sd">For internal use only. No backwards compatibility guarantees.</span>
+
+<span class="sd">Pickles created by the pickling library contain non-ASCII characters, so</span>
+<span class="sd">we base64-encode the results so that we can put them in a JSON objects.</span>
+<span class="sd">The pickler is used to embed FlatMap callable objects into the workflow JSON</span>
+<span class="sd">description.</span>
+
+<span class="sd">The pickler module should be used to pickle functions and modules; for values,</span>
+<span class="sd">the coders.*PickleCoder classes should be used instead.</span>
+<span class="sd">"""</span>
+
+<span class="kn">import</span> <span class="nn">base64</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+<span class="kn">import</span> <span class="nn">traceback</span>
+<span class="kn">import</span> <span class="nn">types</span>
+<span class="kn">import</span> <span class="nn">zlib</span>
+
+<span class="kn">import</span> <span class="nn">dill</span>
+
+
+<span class="k">def</span> <span class="nf">_is_nested_class</span><span class="p">(</span><span class="bp">cls</span><span class="p">):</span>
+ <span class="sd">"""Returns true if argument is a class object that appears to be nested."""</span>
+ <span class="k">return</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="nb">type</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="bp">cls</span><span class="o">.</span><span class="vm">__module__</span> <span class="o">!=</span> <span class="s1">'__builtin__'</span>
+ <span class="ow">and</span> <span class="bp">cls</span><span class="o">.</span><span class="vm">__name__</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">modules</span><span class="p">[</span><span class="bp">cls</span><span class="o">.</span><span class="vm">__module__</span><span class="p">]</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
+
+
+<span class="k">def</span> <span class="nf">_find_containing_class</span><span class="p">(</span><span class="n">nested_class</span><span class="p">):</span>
+ <span class="sd">"""Finds containing class of a nestec class passed as argument."""</span>
+
+ <span class="k">def</span> <span class="nf">_find_containing_class_inner</span><span class="p">(</span><span class="n">outer</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">outer</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
+ <span class="k">if</span> <span class="n">v</span> <span class="ow">is</span> <span class="n">nested_class</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">outer</span><span class="p">,</span> <span class="n">k</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="nb">type</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="s1">'__dict__'</span><span class="p">):</span>
+ <span class="n">res</span> <span class="o">=</span> <span class="n">_find_containing_class_inner</span><span class="p">(</span><span class="n">v</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">res</span><span class="p">:</span> <span class="k">return</span> <span class="n">res</span>
+
+ <span class="k">return</span> <span class="n">_find_containing_class_inner</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">modules</span><span class="p">[</span><span class="n">nested_class</span><span class="o">.</span><span class="vm">__module__</span><span class="p">])</span>
+
+
+<span class="k">def</span> <span class="nf">_nested_type_wrapper</span><span class="p">(</span><span class="n">fun</span><span class="p">):</span>
+ <span class="sd">"""A wrapper for the standard pickler handler for class objects.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> fun: Original pickler handler for type objects.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> A wrapper for type objects that handles nested classes.</span>
+
+<span class="sd"> The wrapper detects if an object being pickled is a nested class object.</span>
+<span class="sd"> For nested class object only it will save the containing class object so</span>
+<span class="sd"> the nested structure is recreated during unpickle.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">wrapper</span><span class="p">(</span><span class="n">pickler</span><span class="p">,</span> <span class="n">obj</span><span class="p">):</span>
+ <span class="c1"># When the nested class is defined in the __main__ module we do not have to</span>
+ <span class="c1"># do anything special because the pickler itself will save the constituent</span>
+ <span class="c1"># parts of the type (i.e., name, base classes, dictionary) and then</span>
+ <span class="c1"># recreate it during unpickling.</span>
+ <span class="k">if</span> <span class="n">_is_nested_class</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span> <span class="ow">and</span> <span class="n">obj</span><span class="o">.</span><span class="vm">__module__</span> <span class="o">!=</span> <span class="s1">'__main__'</span><span class="p">:</span>
+ <span class="n">containing_class_and_name</span> <span class="o">=</span> <span class="n">_find_containing_class</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">containing_class_and_name</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">pickler</span><span class="o">.</span><span class="n">save_reduce</span><span class="p">(</span>
+ <span class="nb">getattr</span><span class="p">,</span> <span class="n">containing_class_and_name</span><span class="p">,</span> <span class="n">obj</span><span class="o">=</span><span class="n">obj</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">fun</span><span class="p">(</span><span class="n">pickler</span><span class="p">,</span> <span class="n">obj</span><span class="p">)</span>
+ <span class="k">except</span> <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">PicklingError</span><span class="p">:</span>
+ <span class="c1"># pylint: disable=protected-access</span>
+ <span class="k">return</span> <span class="n">pickler</span><span class="o">.</span><span class="n">save_reduce</span><span class="p">(</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">_create_type</span><span class="p">,</span>
+ <span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">obj</span><span class="p">),</span> <span class="n">obj</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> <span class="n">obj</span><span class="o">.</span><span class="vm">__bases__</span><span class="p">,</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">_dict_from_dictproxy</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)),</span>
+ <span class="n">obj</span><span class="o">=</span><span class="n">obj</span><span class="p">)</span>
+ <span class="c1"># pylint: enable=protected-access</span>
+
+ <span class="k">return</span> <span class="n">wrapper</span>
+
+
+<span class="c1"># Monkey patch the standard pickler dispatch table entry for type objects.</span>
+<span class="c1"># Dill, for certain types, defers to the standard pickler (including type</span>
+<span class="c1"># objects). We wrap the standard handler using type_wrapper() because</span>
+<span class="c1"># for nested class we want to pickle the actual enclosing class object so we</span>
+<span class="c1"># can recreate it during unpickling.</span>
+<span class="c1"># TODO(silviuc): Make sure we submit the fix upstream to GitHub dill project.</span>
+<span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">Pickler</span><span class="o">.</span><span class="n">dispatch</span><span class="p">[</span><span class="nb">type</span><span class="p">]</span> <span class="o">=</span> <span class="n">_nested_type_wrapper</span><span class="p">(</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">Pickler</span><span class="o">.</span><span class="n">dispatch</span><span class="p">[</span><span class="nb">type</span><span class="p">])</span>
+
+
+<span class="c1"># Dill pickles generators objects without complaint, but unpickling produces</span>
+<span class="c1"># TypeError: object.__new__(generator) is not safe, use generator.__new__()</span>
+<span class="c1"># on some versions of Python.</span>
+<span class="k">def</span> <span class="nf">_reject_generators</span><span class="p">(</span><span class="n">unused_pickler</span><span class="p">,</span> <span class="n">unused_obj</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"can't (safely) pickle generator objects"</span><span class="p">)</span>
+
+
+<span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">Pickler</span><span class="o">.</span><span class="n">dispatch</span><span class="p">[</span><span class="n">types</span><span class="o">.</span><span class="n">GeneratorType</span><span class="p">]</span> <span class="o">=</span> <span class="n">_reject_generators</span>
+
+
+<span class="c1"># This if guards against dill not being full initialized when generating docs.</span>
+<span class="k">if</span> <span class="s1">'save_module'</span> <span class="ow">in</span> <span class="nb">dir</span><span class="p">(</span><span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="p">):</span>
+
+ <span class="c1"># Always pickle non-main modules by name.</span>
+ <span class="n">old_save_module</span> <span class="o">=</span> <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">save_module</span>
+
+<div class="viewcode-block" id="save_module"><a class="viewcode-back" href="../../../apache_beam.internal.pickler.html#apache_beam.internal.pickler.save_module">[docs]</a> <span class="nd">@dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">register</span><span class="p">(</span><span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">save_module</span><span class="p">(</span><span class="n">pickler</span><span class="p">,</span> <span class="n">obj</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">is_dill</span><span class="p">(</span><span class="n">pickler</span><span class="p">)</span> <span class="ow">and</span> <span class="n">obj</span> <span class="ow">is</span> <span class="n">pickler</span><span class="o">.</span><span class="n">_main</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">old_save_module</span><span class="p">(</span><span class="n">pickler</span><span class="p">,</span> <span class="n">obj</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'M2: </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">obj</span><span class="p">)</span>
+ <span class="c1"># pylint: disable=protected-access</span>
+ <span class="n">pickler</span><span class="o">.</span><span class="n">save_reduce</span><span class="p">(</span><span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">_import_module</span><span class="p">,</span> <span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,),</span> <span class="n">obj</span><span class="o">=</span><span class="n">obj</span>< [...]
+ <span class="c1"># pylint: enable=protected-access</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'# M2'</span><span class="p">)</span></div>
+
+ <span class="c1"># Pickle module dictionaries (commonly found in lambda's globals)</span>
+ <span class="c1"># by referencing their module.</span>
+ <span class="n">old_save_module_dict</span> <span class="o">=</span> <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">save_module_dict</span>
+ <span class="n">known_module_dicts</span> <span class="o">=</span> <span class="p">{}</span>
+
+<div class="viewcode-block" id="new_save_module_dict"><a class="viewcode-back" href="../../../apache_beam.internal.pickler.html#apache_beam.internal.pickler.new_save_module_dict">[docs]</a> <span class="nd">@dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">register</span><span class="p">(</span><span class="nb">dict</span><span class="p">)</span>
+ <span class="k">def</span> <span class="nf">new_save_module_dict</span><span class="p">(</span><span class="n">pickler</span><span class="p">,</span> <span class="n">obj</span><span class="p">):</span>
+ <span class="n">obj_id</span> <span class="o">=</span> <span class="nb">id</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">known_module_dicts</span> <span class="ow">or</span> <span class="s1">'__file__'</span> <span class="ow">in</span> <span class="n">obj</span> <span class="ow">or</span> <span class="s1">'__package__'</span> <span class="ow">in</span> <span class="n">obj</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">obj_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">known_module_dicts</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">modules</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">m</span> <span class="ow">and</span> <span class="n">m</span><span class="o">.</span><span class="vm">__name__</span> <span class="o">!=</span> <span class="s1">'__main__'</span><span class="p">:</span>
+ <span class="n">d</span> <span class="o">=</span> <span class="n">m</span><span class="o">.</span><span class="vm">__dict__</span>
+ <span class="n">known_module_dicts</span><span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="n">d</span><span class="p">)]</span> <span class="o">=</span> <span class="n">m</span><span class="p">,</span> <span class="n">d</span>
+ <span class="k">except</span> <span class="ne">AttributeError</span><span class="p">:</span>
+ <span class="c1"># Skip modules that do not have the __name__ attribute.</span>
+ <span class="k">pass</span>
+ <span class="k">if</span> <span class="n">obj_id</span> <span class="ow">in</span> <span class="n">known_module_dicts</span> <span class="ow">and</span> <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">is_dill</span><span class="p">(</span><span class="n">pickler</span><span class="p">):</span>
+ <span class="n">m</span> <span class="o">=</span> <span class="n">known_module_dicts</span><span class="p">[</span><span class="n">obj_id</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="c1"># pylint: disable=protected-access</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">_import_module</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">pickler</span><span class="o">.</span><span class="n">save_reduce</span><span class="p">(</span>
+ <span class="nb">getattr</span><span class="p">,</span> <span class="p">(</span><span class="n">known_module_dicts</span><span class="p">[</span><span class="n">obj_id</span><span class="p">][</span><span class="mi">0</span><span class="p">],</span> <span class="s1">'__dict__'</span><span class="p">),</span> <span class="n">obj</span><span class="o">=</span><span class="n">obj</span><span class="p">)</span>
+ <span class="k">except</span> <span class="p">(</span><span class="ne">ImportError</span><span class="p">,</span> <span class="ne">AttributeError</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">old_save_module_dict</span><span class="p">(</span><span class="n">pickler</span><span class="p">,</span> <span class="n">obj</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">old_save_module_dict</span><span class="p">(</span><span class="n">pickler</span><span class="p">,</span> <span class="n">obj</span><span class="p">)</span></div>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">save_module_dict</span> <span class="o">=</span> <span class="n">new_save_module_dict</span>
+
+ <span class="k">def</span> <span class="nf">_nest_dill_logging</span><span class="p">():</span>
+ <span class="sd">"""Prefix all dill logging with its depth in the callstack.</span>
+
+<span class="sd"> Useful for debugging pickling of deeply nested structures.</span>
+<span class="sd"> """</span>
+ <span class="n">old_log_info</span> <span class="o">=</span> <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span>
+
+ <span class="k">def</span> <span class="nf">new_log_info</span><span class="p">(</span><span class="n">msg</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="n">old_log_info</span><span class="p">(</span>
+ <span class="p">(</span><span class="s1">'1 2 3 4 5 6 7 8 9 0 '</span> <span class="o">*</span> <span class="mi">10</span><span class="p">)[:</span><span class="nb">len</span><span class="p">(</span><span class="n">traceback</span><span class="o">.</span><span class="n">extract_stack</span><span class="p">())]</span> <span class="o">+</span> <span class="n">msg</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span> <span class="o">=</span> <span class="n">new_log_info</span>
+
+
+<span class="c1"># Turn off verbose logging from the dill pickler.</span>
+<span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s1">'dill'</span><span class="p">)</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">WARN</span><span class="p">)</span>
+
+
+<span class="c1"># TODO(ccy): Currently, there are still instances of pickler.dumps() and</span>
+<span class="c1"># pickler.loads() being used for data, which results in an unnecessary base64</span>
+<span class="c1"># encoding. This should be cleaned up.</span>
+<div class="viewcode-block" id="dumps"><a class="viewcode-back" href="../../../apache_beam.internal.pickler.html#apache_beam.internal.pickler.dumps">[docs]</a><span class="k">def</span> <span class="nf">dumps</span><span class="p">(</span><span class="n">o</span><span class="p">,</span> <span class="n">enable_trace</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees."""</span>
+
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">s</span> <span class="o">=</span> <span class="n">dill</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">o</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span>
+ <span class="k">if</span> <span class="n">enable_trace</span><span class="p">:</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">_trace</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span> <span class="c1"># pylint: disable=protected-access</span>
+ <span class="n">s</span> <span class="o">=</span> <span class="n">dill</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">o</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span>
+ <span class="k">finally</span><span class="p">:</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">_trace</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span> <span class="c1"># pylint: disable=protected-access</span>
+
+ <span class="c1"># Compress as compactly as possible to decrease peak memory usage (of multiple</span>
+ <span class="c1"># in-memory copies) and free up some possibly large and no-longer-needed</span>
+ <span class="c1"># memory.</span>
+ <span class="n">c</span> <span class="o">=</span> <span class="n">zlib</span><span class="o">.</span><span class="n">compress</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="mi">9</span><span class="p">)</span>
+ <span class="k">del</span> <span class="n">s</span>
+
+ <span class="k">return</span> <span class="n">base64</span><span class="o">.</span><span class="n">b64encode</span><span class="p">(</span><span class="n">c</span><span class="p">)</span></div>
+
+
+<div class="viewcode-block" id="loads"><a class="viewcode-back" href="../../../apache_beam.internal.pickler.html#apache_beam.internal.pickler.loads">[docs]</a><span class="k">def</span> <span class="nf">loads</span><span class="p">(</span><span class="n">encoded</span><span class="p">,</span> <span class="n">enable_trace</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees."""</span>
+
+ <span class="n">c</span> <span class="o">=</span> <span class="n">base64</span><span class="o">.</span><span class="n">b64decode</span><span class="p">(</span><span class="n">encoded</span><span class="p">)</span>
+
+ <span class="n">s</span> <span class="o">=</span> <span class="n">zlib</span><span class="o">.</span><span class="n">decompress</span><span class="p">(</span><span class="n">c</span><span class="p">)</span>
+ <span class="k">del</span> <span class="n">c</span> <span class="c1"># Free up some possibly large and no-longer-needed memory.</span>
+
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">dill</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span>
+ <span class="k">if</span> <span class="n">enable_trace</span><span class="p">:</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">_trace</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span> <span class="c1"># pylint: disable=protected-access</span>
+ <span class="k">return</span> <span class="n">dill</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span>
+ <span class="k">finally</span><span class="p">:</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dill</span><span class="o">.</span><span class="n">_trace</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span> <span class="c1"># pylint: disable=protected-access</span></div>
+
+
+<div class="viewcode-block" id="dump_session"><a class="viewcode-back" href="../../../apache_beam.internal.pickler.html#apache_beam.internal.pickler.dump_session">[docs]</a><span class="k">def</span> <span class="nf">dump_session</span><span class="p">(</span><span class="n">file_path</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Pickle the current python session to be used in the worker.</span>
+
+<span class="sd"> Note: Due to the inconsistency in the first dump of dill dump_session we</span>
+<span class="sd"> create and load the dump twice to have consistent results in the worker and</span>
+<span class="sd"> the running session. Check: https://github.com/uqfoundation/dill/issues/195</span>
+<span class="sd"> """</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">dump_session</span><span class="p">(</span><span class="n">file_path</span><span class="p">)</span>
+ <span class="n">dill</span><span class="o">.</span><span class="n">load_session</span><span class="p">(</span><span class="n">file_path</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">dill</span><span class="o">.</span><span class="n">dump_session</span><span class="p">(</span><span class="n">file_path</span><span class="p">)</span></div>
+
+
+<div class="viewcode-block" id="load_session"><a class="viewcode-back" href="../../../apache_beam.internal.pickler.html#apache_beam.internal.pickler.load_session">[docs]</a><span class="k">def</span> <span class="nf">load_session</span><span class="p">(</span><span class="n">file_path</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">dill</span><span class="o">.</span><span class="n">load_session</span><span class="p">(</span><span class="n">file_path</span><span class="p">)</span></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/util.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/util.html
new file mode 100644
index 0000000..7ff49c5
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/internal/util.html
@@ -0,0 +1,372 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.internal.util — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.internal.util</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.internal.util</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""Utility functions used throughout the package.</span>
+
+<span class="sd">For internal use only. No backwards compatibility guarantees.</span>
+<span class="sd">"""</span>
+
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">threading</span>
+<span class="kn">import</span> <span class="nn">weakref</span>
+<span class="kn">from</span> <span class="nn">multiprocessing.pool</span> <span class="k">import</span> <span class="n">ThreadPool</span>
+
+
+<div class="viewcode-block" id="ArgumentPlaceholder"><a class="viewcode-back" href="../../../apache_beam.internal.util.html#apache_beam.internal.util.ArgumentPlaceholder">[docs]</a><span class="k">class</span> <span class="nc">ArgumentPlaceholder</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> A place holder object replacing PValues in argument lists.</span>
+
+<span class="sd"> A Fn object can take any number of "side inputs", which are PValues that will</span>
+<span class="sd"> be evaluated during pipeline execution and will be provided to the function</span>
+<span class="sd"> at the moment of its execution as positional or keyword arguments.</span>
+
+<span class="sd"> This is used only internally and should never be used by user code. A custom</span>
+<span class="sd"> Fn object by the time it executes will have such values replaced with real</span>
+<span class="sd"> computed values.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="sd">"""Tests for equality of two placeholder objects.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> other: Another placeholder object to compare to.</span>
+
+<span class="sd"> This method is used only for test code. All placeholder objects are</span>
+<span class="sd"> equal to each other.</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">ArgumentPlaceholder</span><span class="p">)</span></div>
+
+
+<div class="viewcode-block" id="remove_objects_from_args"><a class="viewcode-back" href="../../../apache_beam.internal.util.html#apache_beam.internal.util.remove_objects_from_args">[docs]</a><span class="k">def</span> <span class="nf">remove_objects_from_args</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">,</span> <span class="n">pvalue_classes</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Replaces all objects of a given type in args/kwargs with a placeholder.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> args: A list of positional arguments.</span>
+<span class="sd"> kwargs: A dictionary of keyword arguments.</span>
+<span class="sd"> pvalue_classes: A tuple of class objects representing the types of the</span>
+<span class="sd"> arguments that must be replaced with a placeholder value (instance of</span>
+<span class="sd"> ArgumentPlaceholder)</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> A 3-tuple containing a modified list of positional arguments, a modified</span>
+<span class="sd"> dictionary of keyword arguments, and a list of all objects replaced with</span>
+<span class="sd"> a placeholder value.</span>
+<span class="sd"> """</span>
+ <span class="n">pvals</span> <span class="o">=</span> <span class="p">[]</span>
+
+ <span class="k">def</span> <span class="nf">swapper</span><span class="p">(</span><span class="n">value</span><span class="p">):</span>
+ <span class="n">pvals</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">ArgumentPlaceholder</span><span class="p">()</span>
+ <span class="n">new_args</span> <span class="o">=</span> <span class="p">[</span><span class="n">swapper</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">pvalue_classes</span><span class="p">)</span> <span class="k">else</span> <span class="n">v</span> <span class="k">for</span> <span class="n">v</span> <span [...]
+ <span class="c1"># Make sure the order in which we process the dictionary keys is predictable</span>
+ <span class="c1"># by sorting the entries first. This will be important when putting back</span>
+ <span class="c1"># PValues.</span>
+ <span class="n">new_kwargs</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">((</span><span class="n">k</span><span class="p">,</span> <span class="n">swapper</span><span class="p">(</span><span class="n">v</span><span class="p">))</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">pvalue_classes</span><span class="p">)</span> <span class="k">else</span> <s [...]
+ <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">kwargs</span><span class="o">.</span><span class="n">items</span><span class="p">()))</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">new_args</span><span class="p">,</span> <span class="n">new_kwargs</span><span class="p">,</span> <span class="n">pvals</span><span class="p">)</span></div>
+
+
+<div class="viewcode-block" id="insert_values_in_args"><a class="viewcode-back" href="../../../apache_beam.internal.util.html#apache_beam.internal.util.insert_values_in_args">[docs]</a><span class="k">def</span> <span class="nf">insert_values_in_args</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">,</span> <span class="n">values</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Replaces all placeholders in args/kwargs with actual values.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> args: A list of positional arguments.</span>
+<span class="sd"> kwargs: A dictionary of keyword arguments.</span>
+<span class="sd"> values: A list of values that will be used to replace placeholder values.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> A 2-tuple containing a modified list of positional arguments, and a</span>
+<span class="sd"> modified dictionary of keyword arguments.</span>
+<span class="sd"> """</span>
+ <span class="c1"># Use a local iterator so that we don't modify values.</span>
+ <span class="n">v_iter</span> <span class="o">=</span> <span class="nb">iter</span><span class="p">(</span><span class="n">values</span><span class="p">)</span>
+ <span class="n">new_args</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="nb">next</span><span class="p">(</span><span class="n">v_iter</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">ArgumentPlaceholder</span><span class="p">)</span> <span class="k">else</span> <span class="n">arg</span>
+ <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="n">args</span><span class="p">]</span>
+ <span class="n">new_kwargs</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span>
+ <span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="nb">next</span><span class="p">(</span><span class="n">v_iter</span><span class="p">))</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">ArgumentPlaceholder</span><span class="p">)</span> <span class="k">else</span> <span class="p">(</span><span class="n">k</span><span class="p">,</span> <span [...]
+ <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">kwargs</span><span class="o">.</span><span class="n">items</span><span class="p">()))</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">new_args</span><span class="p">,</span> <span class="n">new_kwargs</span><span class="p">)</span></div>
+
+
+<div class="viewcode-block" id="run_using_threadpool"><a class="viewcode-back" href="../../../apache_beam.internal.util.html#apache_beam.internal.util.run_using_threadpool">[docs]</a><span class="k">def</span> <span class="nf">run_using_threadpool</span><span class="p">(</span><span class="n">fn_to_execute</span><span class="p">,</span> <span class="n">inputs</span><span class="p">,</span> <span class="n">pool_size</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Runs the given function on given inputs using a thread pool.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> fn_to_execute: Function to execute</span>
+<span class="sd"> inputs: Inputs on which given function will be executed in parallel.</span>
+<span class="sd"> pool_size: Size of thread pool.</span>
+<span class="sd"> Returns:</span>
+<span class="sd"> Results retrieved after executing the given function on given inputs.</span>
+<span class="sd"> """</span>
+
+ <span class="c1"># ThreadPool crashes in old versions of Python (< 2.7.5) if created</span>
+ <span class="c1"># from a child thread. (http://bugs.python.org/issue10015)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">threading</span><span class="o">.</span><span class="n">current_thread</span><span class="p">(),</span> <span class="s1">'_children'</span><span class="p">):</span>
+ <span class="n">threading</span><span class="o">.</span><span class="n">current_thread</span><span class="p">()</span><span class="o">.</span><span class="n">_children</span> <span class="o">=</span> <span class="n">weakref</span><span class="o">.</span><span class="n">WeakKeyDictionary</span><span class="p">()</span>
+ <span class="n">pool</span> <span class="o">=</span> <span class="n">ThreadPool</span><span class="p">(</span><span class="nb">min</span><span class="p">(</span><span class="n">pool_size</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">inputs</span><span class="p">)))</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="c1"># We record and reset logging level here since 'apitools' library Beam</span>
+ <span class="c1"># depends on updates the logging level when used with a threadpool -</span>
+ <span class="c1"># https://github.com/google/apitools/issues/141</span>
+ <span class="c1"># TODO: Remove this once above issue in 'apitools' is fixed.</span>
+ <span class="n">old_level</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">()</span><span class="o">.</span><span class="n">level</span>
+ <span class="k">return</span> <span class="n">pool</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">fn_to_execute</span><span class="p">,</span> <span class="n">inputs</span><span class="p">)</span>
+ <span class="k">finally</span><span class="p">:</span>
+ <span class="n">pool</span><span class="o">.</span><span class="n">terminate</span><span class="p">()</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">()</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">old_level</span><span class="p">)</span></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/avroio.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/avroio.html
new file mode 100644
index 0000000..a161887
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/avroio.html
@@ -0,0 +1,701 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.io.avroio — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.io.avroio</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.io.avroio</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+<span class="sd">"""``PTransforms`` for reading from and writing to Avro files.</span>
+
+<span class="sd">Provides two read ``PTransform``s, ``ReadFromAvro`` and ``ReadAllFromAvro``,</span>
+<span class="sd">that produces a ``PCollection`` of records.</span>
+<span class="sd">Each record of this ``PCollection`` will contain a single record read from</span>
+<span class="sd">an Avro file. Records that are of simple types will be mapped into</span>
+<span class="sd">corresponding Python types. Records that are of Avro type 'RECORD' will be</span>
+<span class="sd">mapped to Python dictionaries that comply with the schema contained in the</span>
+<span class="sd">Avro file that contains those records. In this case, keys of each dictionary</span>
+<span class="sd">will contain the corresponding field names and will be of type ``string``</span>
+<span class="sd">while the values of the dictionary will be of the type defined in the</span>
+<span class="sd">corresponding Avro schema.</span>
+
+<span class="sd">For example, if schema of the Avro file is the following.</span>
+<span class="sd">{"namespace": "example.avro","type": "record","name": "User","fields":</span>
+<span class="sd">[{"name": "name", "type": "string"},</span>
+<span class="sd">{"name": "favorite_number", "type": ["int", "null"]},</span>
+<span class="sd">{"name": "favorite_color", "type": ["string", "null"]}]}</span>
+
+<span class="sd">Then records generated by read transforms will be dictionaries of the</span>
+<span class="sd">following form.</span>
+<span class="sd">{u'name': u'Alyssa', u'favorite_number': 256, u'favorite_color': None}).</span>
+
+<span class="sd">Additionally, this module provides a write ``PTransform`` ``WriteToAvro``</span>
+<span class="sd">that can be used to write a given ``PCollection`` of Python objects to an</span>
+<span class="sd">Avro file.</span>
+<span class="sd">"""</span>
+
+<span class="kn">import</span> <span class="nn">cStringIO</span>
+<span class="kn">import</span> <span class="nn">os</span>
+<span class="kn">import</span> <span class="nn">zlib</span>
+<span class="kn">from</span> <span class="nn">functools</span> <span class="k">import</span> <span class="n">partial</span>
+
+<span class="kn">import</span> <span class="nn">avro</span>
+<span class="kn">from</span> <span class="nn">avro</span> <span class="k">import</span> <span class="n">io</span> <span class="k">as</span> <span class="n">avroio</span>
+<span class="kn">from</span> <span class="nn">avro</span> <span class="k">import</span> <span class="n">datafile</span>
+<span class="kn">from</span> <span class="nn">avro</span> <span class="k">import</span> <span class="n">schema</span>
+
+<span class="kn">import</span> <span class="nn">apache_beam</span> <span class="k">as</span> <span class="nn">beam</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io</span> <span class="k">import</span> <span class="n">filebasedsink</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io</span> <span class="k">import</span> <span class="n">filebasedsource</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io</span> <span class="k">import</span> <span class="n">iobase</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.filesystem</span> <span class="k">import</span> <span class="n">CompressionTypes</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.iobase</span> <span class="k">import</span> <span class="n">Read</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms</span> <span class="k">import</span> <span class="n">PTransform</span>
+
+<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'ReadFromAvro'</span><span class="p">,</span> <span class="s1">'ReadAllFromAvro'</span><span class="p">,</span> <span class="s1">'WriteToAvro'</span><span class="p">]</span>
+
+
+<div class="viewcode-block" id="ReadFromAvro"><a class="viewcode-back" href="../../../apache_beam.io.avroio.html#apache_beam.io.avroio.ReadFromAvro">[docs]</a><span class="k">class</span> <span class="nc">ReadFromAvro</span><span class="p">(</span><span class="n">PTransform</span><span class="p">):</span>
+ <span class="sd">"""A :class:`~apache_beam.transforms.ptransform.PTransform` for reading avro</span>
+<span class="sd"> files."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_pattern</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">min_bundle_size</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">validate</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
+ <span class="sd">"""Initializes :class:`ReadFromAvro`.</span>
+
+<span class="sd"> Uses source :class:`~apache_beam.io._AvroSource` to read a set of Avro</span>
+<span class="sd"> files defined by a given file pattern.</span>
+
+<span class="sd"> If ``/mypath/myavrofiles*`` is a file-pattern that points to a set of Avro</span>
+<span class="sd"> files, a :class:`~apache_beam.pvalue.PCollection` for the records in</span>
+<span class="sd"> these Avro files can be created in the following manner.</span>
+
+<span class="sd"> .. testcode::</span>
+
+<span class="sd"> with beam.Pipeline() as p:</span>
+<span class="sd"> records = p | 'Read' >> beam.io.ReadFromAvro('/mypath/myavrofiles*')</span>
+
+<span class="sd"> .. NOTE: We're not actually interested in this error; but if we get here,</span>
+<span class="sd"> it means that the way of calling this transform hasn't changed.</span>
+
+<span class="sd"> .. testoutput::</span>
+<span class="sd"> :hide:</span>
+
+<span class="sd"> Traceback (most recent call last):</span>
+<span class="sd"> ...</span>
+<span class="sd"> IOError: No files found based on the file pattern</span>
+
+<span class="sd"> Each record of this :class:`~apache_beam.pvalue.PCollection` will contain</span>
+<span class="sd"> a single record read from a source. Records that are of simple types will be</span>
+<span class="sd"> mapped into corresponding Python types. Records that are of Avro type</span>
+<span class="sd"> ``RECORD`` will be mapped to Python dictionaries that comply with the schema</span>
+<span class="sd"> contained in the Avro file that contains those records. In this case, keys</span>
+<span class="sd"> of each dictionary will contain the corresponding field names and will be of</span>
+<span class="sd"> type :class:`str` while the values of the dictionary will be of the type</span>
+<span class="sd"> defined in the corresponding Avro schema.</span>
+
+<span class="sd"> For example, if schema of the Avro file is the following. ::</span>
+
+<span class="sd"> {</span>
+<span class="sd"> "namespace": "example.avro",</span>
+<span class="sd"> "type": "record",</span>
+<span class="sd"> "name": "User",</span>
+<span class="sd"> "fields": [</span>
+
+<span class="sd"> {"name": "name",</span>
+<span class="sd"> "type": "string"},</span>
+
+<span class="sd"> {"name": "favorite_number",</span>
+<span class="sd"> "type": ["int", "null"]},</span>
+
+<span class="sd"> {"name": "favorite_color",</span>
+<span class="sd"> "type": ["string", "null"]}</span>
+
+<span class="sd"> ]</span>
+<span class="sd"> }</span>
+
+<span class="sd"> Then records generated by :class:`~apache_beam.io._AvroSource` will be</span>
+<span class="sd"> dictionaries of the following form. ::</span>
+
+<span class="sd"> {u'name': u'Alyssa', u'favorite_number': 256, u'favorite_color': None}).</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> file_pattern (str): the file glob to read</span>
+<span class="sd"> min_bundle_size (int): the minimum size in bytes, to be considered when</span>
+<span class="sd"> splitting the input into bundles.</span>
+<span class="sd"> validate (bool): flag to verify that the files exist during the pipeline</span>
+<span class="sd"> creation time.</span>
+<span class="sd"> """</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">ReadFromAvro</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_source</span> <span class="o">=</span> <span class="n">_AvroSource</span><span class="p">(</span><span class="n">file_pattern</span><span class="p">,</span> <span class="n">min_bundle_size</span><span class="p">,</span> <span class="n">validate</span><span class="o">=</span><span class="n">validate</span><span class="p">)</span>
+
+<div class="viewcode-block" id="ReadFromAvro.expand"><a class="viewcode-back" href="../../../apache_beam.io.avroio.html#apache_beam.io.avroio.ReadFromAvro.expand">[docs]</a> <span class="k">def</span> <span class="nf">expand</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalue</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">pipeline</span> <span class="o">|</span> <span class="n">Read</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="ReadFromAvro.display_data"><a class="viewcode-back" href="../../../apache_beam.io.avroio.html#apache_beam.io.avroio.ReadFromAvro.display_data">[docs]</a> <span class="k">def</span> <span class="nf">display_data</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span><span class="s1">'source_dd'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source</span><span class="p">}</span></div></div>
+
+
+<div class="viewcode-block" id="ReadAllFromAvro"><a class="viewcode-back" href="../../../apache_beam.io.avroio.html#apache_beam.io.avroio.ReadAllFromAvro">[docs]</a><span class="k">class</span> <span class="nc">ReadAllFromAvro</span><span class="p">(</span><span class="n">PTransform</span><span class="p">):</span>
+ <span class="sd">"""A ``PTransform`` for reading ``PCollection`` of Avro files.</span>
+
+<span class="sd"> Uses source '_AvroSource' to read a ``PCollection`` of Avro files or</span>
+<span class="sd"> file patterns and produce a ``PCollection`` of Avro records.</span>
+<span class="sd"> """</span>
+
+ <span class="n">DEFAULT_DESIRED_BUNDLE_SIZE</span> <span class="o">=</span> <span class="mi">64</span> <span class="o">*</span> <span class="mi">1024</span> <span class="o">*</span> <span class="mi">1024</span> <span class="c1"># 64MB</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">min_bundle_size</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
+ <span class="n">desired_bundle_size</span><span class="o">=</span><span class="n">DEFAULT_DESIRED_BUNDLE_SIZE</span><span class="p">):</span>
+ <span class="sd">"""Initializes ``ReadAllFromAvro``.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> min_bundle_size: the minimum size in bytes, to be considered when</span>
+<span class="sd"> splitting the input into bundles.</span>
+<span class="sd"> desired_bundle_size: the desired size in bytes, to be considered when</span>
+<span class="sd"> splitting the input into bundles.</span>
+<span class="sd"> """</span>
+ <span class="n">source_from_file</span> <span class="o">=</span> <span class="n">partial</span><span class="p">(</span>
+ <span class="n">_create_avro_source</span><span class="p">,</span> <span class="n">min_bundle_size</span><span class="o">=</span><span class="n">min_bundle_size</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_all_files</span> <span class="o">=</span> <span class="n">filebasedsource</span><span class="o">.</span><span class="n">ReadAllFiles</span><span class="p">(</span>
+ <span class="kc">True</span><span class="p">,</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">,</span> <span class="n">desired_bundle_size</span><span class="p">,</span> <span class="n">min_bundle_size</span><span class="p">,</span>
+ <span class="n">source_from_file</span><span class="p">)</span>
+
+<div class="viewcode-block" id="ReadAllFromAvro.expand"><a class="viewcode-back" href="../../../apache_beam.io.avroio.html#apache_beam.io.avroio.ReadAllFromAvro.expand">[docs]</a> <span class="k">def</span> <span class="nf">expand</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalue</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">pvalue</span> <span class="o">|</span> <span class="s1">'ReadAllFiles'</span> <span class="o">>></span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_all_files</span></div></div>
+
+
+<span class="k">class</span> <span class="nc">_AvroUtils</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">read_meta_data_from_file</span><span class="p">(</span><span class="n">f</span><span class="p">):</span>
+ <span class="sd">"""Reads metadata from a given Avro file.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> f: Avro file to read.</span>
+<span class="sd"> Returns:</span>
+<span class="sd"> a tuple containing the codec, schema, and the sync marker of the Avro</span>
+<span class="sd"> file.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ValueError: if the file does not start with the byte sequence defined in</span>
+<span class="sd"> the specification.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="n">f</span><span class="o">.</span><span class="n">tell</span><span class="p">()</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+ <span class="n">decoder</span> <span class="o">=</span> <span class="n">avroio</span><span class="o">.</span><span class="n">BinaryDecoder</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="n">header</span> <span class="o">=</span> <span class="n">avroio</span><span class="o">.</span><span class="n">DatumReader</span><span class="p">()</span><span class="o">.</span><span class="n">read_data</span><span class="p">(</span><span class="n">datafile</span><span class="o">.</span><span class="n">META_SCHEMA</span><span class="p">,</span>
+ <span class="n">datafile</span><span class="o">.</span><span class="n">META_SCHEMA</span><span class="p">,</span> <span class="n">decoder</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">header</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'magic'</span><span class="p">)</span> <span class="o">!=</span> <span class="n">datafile</span><span class="o">.</span><span class="n">MAGIC</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Not an Avro file. File header should start with </span><span class="si">%s</span><span class="s1"> but'</span>
+ <span class="s1">'started with </span><span class="si">%s</span><span class="s1"> instead.'</span><span class="p">,</span> <span class="n">datafile</span><span class="o">.</span><span class="n">MAGIC</span><span class="p">,</span>
+ <span class="n">header</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'magic'</span><span class="p">))</span>
+
+ <span class="n">meta</span> <span class="o">=</span> <span class="n">header</span><span class="p">[</span><span class="s1">'meta'</span><span class="p">]</span>
+
+ <span class="k">if</span> <span class="n">datafile</span><span class="o">.</span><span class="n">CODEC_KEY</span> <span class="ow">in</span> <span class="n">meta</span><span class="p">:</span>
+ <span class="n">codec</span> <span class="o">=</span> <span class="n">meta</span><span class="p">[</span><span class="n">datafile</span><span class="o">.</span><span class="n">CODEC_KEY</span><span class="p">]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">codec</span> <span class="o">=</span> <span class="s1">'null'</span>
+
+ <span class="n">schema_string</span> <span class="o">=</span> <span class="n">meta</span><span class="p">[</span><span class="n">datafile</span><span class="o">.</span><span class="n">SCHEMA_KEY</span><span class="p">]</span>
+ <span class="n">sync_marker</span> <span class="o">=</span> <span class="n">header</span><span class="p">[</span><span class="s1">'sync'</span><span class="p">]</span>
+
+ <span class="k">return</span> <span class="n">codec</span><span class="p">,</span> <span class="n">schema_string</span><span class="p">,</span> <span class="n">sync_marker</span>
+
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">read_block_from_file</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">codec</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">expected_sync_marker</span><span class="p">):</span>
+ <span class="sd">"""Reads a block from a given Avro file.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> f: Avro file to read.</span>
+<span class="sd"> codec: The codec to use for block-level decompression.</span>
+<span class="sd"> Supported codecs: 'null', 'deflate', 'snappy'</span>
+<span class="sd"> schema: Avro Schema definition represented as JSON string.</span>
+<span class="sd"> expected_sync_marker: Avro synchronization marker. If the block's sync</span>
+<span class="sd"> marker does not match with this parameter then ValueError is thrown.</span>
+<span class="sd"> Returns:</span>
+<span class="sd"> A single _AvroBlock.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ValueError: If the block cannot be read properly because the file doesn't</span>
+<span class="sd"> match the specification.</span>
+<span class="sd"> """</span>
+ <span class="n">offset</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">tell</span><span class="p">()</span>
+ <span class="n">decoder</span> <span class="o">=</span> <span class="n">avroio</span><span class="o">.</span><span class="n">BinaryDecoder</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+ <span class="n">num_records</span> <span class="o">=</span> <span class="n">decoder</span><span class="o">.</span><span class="n">read_long</span><span class="p">()</span>
+ <span class="n">block_size</span> <span class="o">=</span> <span class="n">decoder</span><span class="o">.</span><span class="n">read_long</span><span class="p">()</span>
+ <span class="n">block_bytes</span> <span class="o">=</span> <span class="n">decoder</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">block_size</span><span class="p">)</span>
+ <span class="n">sync_marker</span> <span class="o">=</span> <span class="n">decoder</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">expected_sync_marker</span><span class="p">))</span>
+ <span class="k">if</span> <span class="n">sync_marker</span> <span class="o">!=</span> <span class="n">expected_sync_marker</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Unexpected sync marker (actual "</span><span class="si">%s</span><span class="s1">" vs expected "</span><span class="si">%s</span><span class="s1">"). '</span>
+ <span class="s1">'Maybe the underlying avro file is corrupted?'</span><span class="p">,</span>
+ <span class="n">sync_marker</span><span class="p">,</span> <span class="n">expected_sync_marker</span><span class="p">)</span>
+ <span class="n">size</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">tell</span><span class="p">()</span> <span class="o">-</span> <span class="n">offset</span>
+ <span class="k">return</span> <span class="n">_AvroBlock</span><span class="p">(</span><span class="n">block_bytes</span><span class="p">,</span> <span class="n">num_records</span><span class="p">,</span> <span class="n">codec</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">offset</span><span class="p">,</span> <span class="n">size</span><span class="p">)</span>
+
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">advance_file_past_next_sync_marker</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">sync_marker</span><span class="p">):</span>
+ <span class="n">buf_size</span> <span class="o">=</span> <span class="mi">10000</span>
+
+ <span class="n">data</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">buf_size</span><span class="p">)</span>
+ <span class="k">while</span> <span class="n">data</span><span class="p">:</span>
+ <span class="n">pos</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="n">sync_marker</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">pos</span> <span class="o">>=</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="c1"># Adjusting the current position to the ending position of the sync</span>
+ <span class="c1"># marker.</span>
+ <span class="n">backtrack</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> <span class="o">-</span> <span class="n">pos</span> <span class="o">-</span> <span class="nb">len</span><span class="p">(</span><span class="n">sync_marker</span><span class="p">)</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span> <span class="o">*</span> <span class="n">backtrack</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_CUR</span><span class="p">)</span>
+ <span class="k">return</span> <span class="kc">True</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">f</span><span class="o">.</span><span class="n">tell</span><span class="p">()</span> <span class="o">>=</span> <span class="nb">len</span><span class="p">(</span><span class="n">sync_marker</span><span class="p">):</span>
+ <span class="c1"># Backtracking in case we partially read the sync marker during the</span>
+ <span class="c1"># previous read. We only have to backtrack if there are at least</span>
+ <span class="c1"># len(sync_marker) bytes before current position. We only have to</span>
+ <span class="c1"># backtrack (len(sync_marker) - 1) bytes.</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span> <span class="o">*</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sync_marker</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">),</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_CUR</span><span class="p">)</span>
+ <span class="n">data</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">buf_size</span><span class="p">)</span>
+
+
+<span class="k">def</span> <span class="nf">_create_avro_source</span><span class="p">(</span><span class="n">file_pattern</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">min_bundle_size</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">_AvroSource</span><span class="p">(</span>
+ <span class="n">file_pattern</span><span class="o">=</span><span class="n">file_pattern</span><span class="p">,</span> <span class="n">min_bundle_size</span><span class="o">=</span><span class="n">min_bundle_size</span><span class="p">,</span>
+ <span class="n">validate</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">_AvroBlock</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""Represents a block of an Avro file."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">block_bytes</span><span class="p">,</span> <span class="n">num_records</span><span class="p">,</span> <span class="n">codec</span><span class="p">,</span> <span class="n">schema_string</span><span class="p">,</span>
+ <span class="n">offset</span><span class="p">,</span> <span class="n">size</span><span class="p">):</span>
+ <span class="c1"># Decompress data early on (if needed) and thus decrease the number of</span>
+ <span class="c1"># parallel copies of the data in memory at any given in time during</span>
+ <span class="c1"># block iteration.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_decompressed_block_bytes</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_decompress_bytes</span><span class="p">(</span><span class="n">block_bytes</span><span class="p">,</span> <span class="n">codec</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_num_records</span> <span class="o">=</span> <span class="n">num_records</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_schema</span> <span class="o">=</span> <span class="n">schema</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">schema_string</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_offset</span> <span class="o">=</span> <span class="n">offset</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_size</span> <span class="o">=</span> <span class="n">size</span>
+
+ <span class="k">def</span> <span class="nf">size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_size</span>
+
+ <span class="k">def</span> <span class="nf">offset</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_offset</span>
+
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">_decompress_bytes</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">codec</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">codec</span> <span class="o">==</span> <span class="s1">'null'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">data</span>
+ <span class="k">elif</span> <span class="n">codec</span> <span class="o">==</span> <span class="s1">'deflate'</span><span class="p">:</span>
+ <span class="c1"># zlib.MAX_WBITS is the window size. '-' sign indicates that this is</span>
+ <span class="c1"># raw data (without headers). See zlib and Avro documentations for more</span>
+ <span class="c1"># details.</span>
+ <span class="k">return</span> <span class="n">zlib</span><span class="o">.</span><span class="n">decompress</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="o">-</span><span class="n">zlib</span><span class="o">.</span><span class="n">MAX_WBITS</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">codec</span> <span class="o">==</span> <span class="s1">'snappy'</span><span class="p">:</span>
+ <span class="c1"># Snappy is an optional avro codec.</span>
+ <span class="c1"># See Snappy and Avro documentation for more details.</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="kn">import</span> <span class="nn">snappy</span>
+ <span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Snappy does not seem to be installed.'</span><span class="p">)</span>
+
+ <span class="c1"># Compressed data includes a 4-byte CRC32 checksum which we verify.</span>
+ <span class="c1"># We take care to avoid extra copies of data while slicing large objects</span>
+ <span class="c1"># by use of a buffer.</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="n">snappy</span><span class="o">.</span><span class="n">decompress</span><span class="p">(</span><span class="n">buffer</span><span class="p">(</span><span class="n">data</span><span class="p">)[:</span><span class="o">-</span><span class="mi">4</span><span class="p">])</span>
+ <span class="n">avroio</span><span class="o">.</span><span class="n">BinaryDecoder</span><span class="p">(</span><span class="n">cStringIO</span><span class="o">.</span><span class="n">StringIO</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="o">-</span><span class="mi">4</span><span class="p">:]))</span><span class="o">.</span><span class="n">check_crc32</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">result</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Unknown codec: </span><span class="si">%r</span><span class="s1">'</span><span class="p">,</span> <span class="n">codec</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">num_records</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_num_records</span>
+
+ <span class="k">def</span> <span class="nf">records</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">decoder</span> <span class="o">=</span> <span class="n">avroio</span><span class="o">.</span><span class="n">BinaryDecoder</span><span class="p">(</span>
+ <span class="n">cStringIO</span><span class="o">.</span><span class="n">StringIO</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_decompressed_block_bytes</span><span class="p">))</span>
+ <span class="n">reader</span> <span class="o">=</span> <span class="n">avroio</span><span class="o">.</span><span class="n">DatumReader</span><span class="p">(</span>
+ <span class="n">writers_schema</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_schema</span><span class="p">,</span> <span class="n">readers_schema</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_schema</span><span class="p">)</span>
+
+ <span class="n">current_record</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">while</span> <span class="n">current_record</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">_num_records</span><span class="p">:</span>
+ <span class="k">yield</span> <span class="n">reader</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">decoder</span><span class="p">)</span>
+ <span class="n">current_record</span> <span class="o">+=</span> <span class="mi">1</span>
+
+
+<span class="k">class</span> <span class="nc">_AvroSource</span><span class="p">(</span><span class="n">filebasedsource</span><span class="o">.</span><span class="n">FileBasedSource</span><span class="p">):</span>
+ <span class="sd">"""A source for reading Avro files.</span>
+
+<span class="sd"> ``_AvroSource`` is implemented using the file-based source framework available</span>
+<span class="sd"> in module 'filebasedsource'. Hence please refer to module 'filebasedsource'</span>
+<span class="sd"> to fully understand how this source implements operations common to all</span>
+<span class="sd"> file-based sources such as file-pattern expansion and splitting into bundles</span>
+<span class="sd"> for parallel processing.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">read_records</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_name</span><span class="p">,</span> <span class="n">range_tracker</span><span class="p">):</span>
+ <span class="n">next_block_start</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span>
+
+ <span class="k">def</span> <span class="nf">split_points_unclaimed</span><span class="p">(</span><span class="n">stop_position</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">next_block_start</span> <span class="o">>=</span> <span class="n">stop_position</span><span class="p">:</span>
+ <span class="c1"># Next block starts at or after the suggested stop position. Hence</span>
+ <span class="c1"># there will not be split points to be claimed for the range ending at</span>
+ <span class="c1"># suggested stop position.</span>
+ <span class="k">return</span> <span class="mi">0</span>
+
+ <span class="k">return</span> <span class="n">iobase</span><span class="o">.</span><span class="n">RangeTracker</span><span class="o">.</span><span class="n">SPLIT_POINTS_UNKNOWN</span>
+
+ <span class="n">range_tracker</span><span class="o">.</span><span class="n">set_split_points_unclaimed_callback</span><span class="p">(</span><span class="n">split_points_unclaimed</span><span class="p">)</span>
+
+ <span class="n">start_offset</span> <span class="o">=</span> <span class="n">range_tracker</span><span class="o">.</span><span class="n">start_position</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">start_offset</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">start_offset</span> <span class="o">=</span> <span class="mi">0</span>
+
+ <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">open_file</span><span class="p">(</span><span class="n">file_name</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
+ <span class="n">codec</span><span class="p">,</span> <span class="n">schema_string</span><span class="p">,</span> <span class="n">sync_marker</span> <span class="o">=</span> <span class="n">_AvroUtils</span><span class="o">.</span><span class="n">read_meta_data_from_file</span><span class="p">(</span>
+ <span class="n">f</span><span class="p">)</span>
+
+ <span class="c1"># We have to start at current position if previous bundle ended at the</span>
+ <span class="c1"># end of a sync marker.</span>
+ <span class="n">start_offset</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">start_offset</span> <span class="o">-</span> <span class="nb">len</span><span class="p">(</span><span class="n">sync_marker</span><span class="p">))</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="n">start_offset</span><span class="p">)</span>
+ <span class="n">_AvroUtils</span><span class="o">.</span><span class="n">advance_file_past_next_sync_marker</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">sync_marker</span><span class="p">)</span>
+
+ <span class="k">while</span> <span class="n">range_tracker</span><span class="o">.</span><span class="n">try_claim</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">tell</span><span class="p">()):</span>
+ <span class="n">block</span> <span class="o">=</span> <span class="n">_AvroUtils</span><span class="o">.</span><span class="n">read_block_from_file</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">codec</span><span class="p">,</span> <span class="n">schema_string</span><span class="p">,</span>
+ <span class="n">sync_marker</span><span class="p">)</span>
+ <span class="n">next_block_start</span> <span class="o">=</span> <span class="n">block</span><span class="o">.</span><span class="n">offset</span><span class="p">()</span> <span class="o">+</span> <span class="n">block</span><span class="o">.</span><span class="n">size</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="n">block</span><span class="o">.</span><span class="n">records</span><span class="p">():</span>
+ <span class="k">yield</span> <span class="n">record</span>
+
+
+<div class="viewcode-block" id="WriteToAvro"><a class="viewcode-back" href="../../../apache_beam.io.avroio.html#apache_beam.io.avroio.WriteToAvro">[docs]</a><span class="k">class</span> <span class="nc">WriteToAvro</span><span class="p">(</span><span class="n">beam</span><span class="o">.</span><span class="n">transforms</span><span class="o">.</span><span class="n">PTransform</span><span class="p">):</span>
+ <span class="sd">"""A ``PTransform`` for writing avro files."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">file_path_prefix</span><span class="p">,</span>
+ <span class="n">schema</span><span class="p">,</span>
+ <span class="n">codec</span><span class="o">=</span><span class="s1">'deflate'</span><span class="p">,</span>
+ <span class="n">file_name_suffix</span><span class="o">=</span><span class="s1">''</span><span class="p">,</span>
+ <span class="n">num_shards</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
+ <span class="n">shard_name_template</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+ <span class="n">mime_type</span><span class="o">=</span><span class="s1">'application/x-avro'</span><span class="p">):</span>
+ <span class="sd">"""Initialize a WriteToAvro transform.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> file_path_prefix: The file path to write to. The files written will begin</span>
+<span class="sd"> with this prefix, followed by a shard identifier (see num_shards), and</span>
+<span class="sd"> end in a common extension, if given by file_name_suffix. In most cases,</span>
+<span class="sd"> only this argument is specified and num_shards, shard_name_template, and</span>
+<span class="sd"> file_name_suffix use default values.</span>
+<span class="sd"> schema: The schema to use, as returned by avro.schema.parse</span>
+<span class="sd"> codec: The codec to use for block-level compression. Any string supported</span>
+<span class="sd"> by the Avro specification is accepted (for example 'null').</span>
+<span class="sd"> file_name_suffix: Suffix for the files written.</span>
+<span class="sd"> num_shards: The number of files (shards) used for output. If not set, the</span>
+<span class="sd"> service will decide on the optimal number of shards.</span>
+<span class="sd"> Constraining the number of shards is likely to reduce</span>
+<span class="sd"> the performance of a pipeline. Setting this value is not recommended</span>
+<span class="sd"> unless you require a specific number of output files.</span>
+<span class="sd"> shard_name_template: A template string containing placeholders for</span>
+<span class="sd"> the shard number and shard count. When constructing a filename for a</span>
+<span class="sd"> particular shard number, the upper-case letters 'S' and 'N' are</span>
+<span class="sd"> replaced with the 0-padded shard number and shard count respectively.</span>
+<span class="sd"> This argument can be '' in which case it behaves as if num_shards was</span>
+<span class="sd"> set to 1 and only one file will be generated. The default pattern used</span>
+<span class="sd"> is '-SSSSS-of-NNNNN' if None is passed as the shard_name_template.</span>
+<span class="sd"> mime_type: The MIME type to use for the produced files, if the filesystem</span>
+<span class="sd"> supports specifying MIME types.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> A WriteToAvro transform usable for writing.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_sink</span> <span class="o">=</span> <span class="n">_AvroSink</span><span class="p">(</span><span class="n">file_path_prefix</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">codec</span><span class="p">,</span> <span class="n">file_name_suffix</span><span class="p">,</span>
+ <span class="n">num_shards</span><span class="p">,</span> <span class="n">shard_name_template</span><span class="p">,</span> <span class="n">mime_type</span><span class="p">)</span>
+
+<div class="viewcode-block" id="WriteToAvro.expand"><a class="viewcode-back" href="../../../apache_beam.io.avroio.html#apache_beam.io.avroio.WriteToAvro.expand">[docs]</a> <span class="k">def</span> <span class="nf">expand</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pcoll</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">pcoll</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">iobase</span><span class="o">.</span><span class="n">Write</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_sink</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="WriteToAvro.display_data"><a class="viewcode-back" href="../../../apache_beam.io.avroio.html#apache_beam.io.avroio.WriteToAvro.display_data">[docs]</a> <span class="k">def</span> <span class="nf">display_data</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span><span class="s1">'sink_dd'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sink</span><span class="p">}</span></div></div>
+
+
+<span class="k">class</span> <span class="nc">_AvroSink</span><span class="p">(</span><span class="n">filebasedsink</span><span class="o">.</span><span class="n">FileBasedSink</span><span class="p">):</span>
+ <span class="sd">"""A sink to avro files."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">file_path_prefix</span><span class="p">,</span>
+ <span class="n">schema</span><span class="p">,</span>
+ <span class="n">codec</span><span class="p">,</span>
+ <span class="n">file_name_suffix</span><span class="p">,</span>
+ <span class="n">num_shards</span><span class="p">,</span>
+ <span class="n">shard_name_template</span><span class="p">,</span>
+ <span class="n">mime_type</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">_AvroSink</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
+ <span class="n">file_path_prefix</span><span class="p">,</span>
+ <span class="n">file_name_suffix</span><span class="o">=</span><span class="n">file_name_suffix</span><span class="p">,</span>
+ <span class="n">num_shards</span><span class="o">=</span><span class="n">num_shards</span><span class="p">,</span>
+ <span class="n">shard_name_template</span><span class="o">=</span><span class="n">shard_name_template</span><span class="p">,</span>
+ <span class="n">coder</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+ <span class="n">mime_type</span><span class="o">=</span><span class="n">mime_type</span><span class="p">,</span>
+ <span class="c1"># Compression happens at the block level using the supplied codec, and</span>
+ <span class="c1"># not at the file level.</span>
+ <span class="n">compression_type</span><span class="o">=</span><span class="n">CompressionTypes</span><span class="o">.</span><span class="n">UNCOMPRESSED</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_schema</span> <span class="o">=</span> <span class="n">schema</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_codec</span> <span class="o">=</span> <span class="n">codec</span>
+
+ <span class="k">def</span> <span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">temp_path</span><span class="p">):</span>
+ <span class="n">file_handle</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">_AvroSink</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">temp_path</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">avro</span><span class="o">.</span><span class="n">datafile</span><span class="o">.</span><span class="n">DataFileWriter</span><span class="p">(</span>
+ <span class="n">file_handle</span><span class="p">,</span> <span class="n">avro</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">DatumWriter</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_schema</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_codec</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">write_record</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">writer</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="n">writer</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">display_data</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">res</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">display_data</span><span class="p">()</span>
+ <span class="n">res</span><span class="p">[</span><span class="s1">'codec'</span><span class="p">]</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_codec</span><span class="p">)</span>
+ <span class="n">res</span><span class="p">[</span><span class="s1">'schema'</span><span class="p">]</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_schema</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">res</span>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/concat_source.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/concat_source.html
new file mode 100644
index 0000000..f1c29ce
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/concat_source.html
@@ -0,0 +1,503 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.io.concat_source — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.io.concat_source</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.io.concat_source</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd">Concat Source, which reads the union of several other sources.</span>
+<span class="sd">"""</span>
+
+<span class="kn">import</span> <span class="nn">bisect</span>
+<span class="kn">import</span> <span class="nn">threading</span>
+
+<span class="kn">from</span> <span class="nn">apache_beam.io</span> <span class="k">import</span> <span class="n">iobase</span>
+
+
+<div class="viewcode-block" id="ConcatSource"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatSource">[docs]</a><span class="k">class</span> <span class="nc">ConcatSource</span><span class="p">(</span><span class="n">iobase</span><span class="o">.</span><span class="n">BoundedSource</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> A ``BoundedSource`` that can group a set of ``BoundedSources``.</span>
+
+<span class="sd"> Primarily for internal use, use the ``apache_beam.Flatten`` transform</span>
+<span class="sd"> to create the union of several reads.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sources</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span> <span class="o">=</span> <span class="p">[</span><span class="n">source</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">source</span><span class="p">,</span> <span class="n">iobase</span><span class="o">.</span><span class="n">SourceBundle</span><span class="p">)</span>
+ <span class="k">else</span> <span class="n">iobase</span><span class="o">.</span><span class="n">SourceBundle</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">source</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">source</span> <span class="ow">in</span> <span class="n">sources</span><span class="p">]</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">sources</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">[</span><span class="n">s</span><span class="o">.</span><span class="n">source</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span><span class="p">]</span>
+
+<div class="viewcode-block" id="ConcatSource.estimate_size"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatSource.estimate_size">[docs]</a> <span class="k">def</span> <span class="nf">estimate_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">sum</span><span class="p">(</span><span class="n">s</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">estimate_size</span><span class="p">()</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="ConcatSource.split"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatSource.split">[docs]</a> <span class="k">def</span> <span class="nf">split</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">desired_bundle_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">start_position</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">stop_position</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">start_position</span> <span class="ow">or</span> <span class="n">stop_position</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+ <span class="s1">'Multi-level initial splitting is not supported. Expected start and '</span>
+ <span class="s1">'stop positions to be None. Received </span><span class="si">%r</span><span class="s1"> and </span><span class="si">%r</span><span class="s1"> respectively.'</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">start_position</span><span class="p">,</span> <span class="n">stop_position</span><span class="p">))</span>
+
+ <span class="k">for</span> <span class="n">source</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span><span class="p">:</span>
+ <span class="c1"># We assume all sub-sources to produce bundles that specify weight using</span>
+ <span class="c1"># the same unit. For example, all sub-sources may specify the size in</span>
+ <span class="c1"># bytes as their weight.</span>
+ <span class="k">for</span> <span class="n">bundle</span> <span class="ow">in</span> <span class="n">source</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">split</span><span class="p">(</span>
+ <span class="n">desired_bundle_size</span><span class="p">,</span> <span class="n">source</span><span class="o">.</span><span class="n">start_position</span><span class="p">,</span> <span class="n">source</span><span class="o">.</span><span class="n">stop_position</span><span class="p">):</span>
+ <span class="k">yield</span> <span class="n">bundle</span></div>
+
+<div class="viewcode-block" id="ConcatSource.get_range_tracker"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatSource.get_range_tracker">[docs]</a> <span class="k">def</span> <span class="nf">get_range_tracker</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">start_position</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">stop_p [...]
+ <span class="k">if</span> <span class="n">start_position</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">start_position</span> <span class="o">=</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">stop_position</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">stop_position</span> <span class="o">=</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span><span class="p">),</span> <span class="kc">None</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">ConcatRangeTracker</span><span class="p">(</span>
+ <span class="n">start_position</span><span class="p">,</span> <span class="n">stop_position</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="ConcatSource.read"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatSource.read">[docs]</a> <span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">range_tracker</span><span class="p">):</span>
+ <span class="n">start_source</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">range_tracker</span><span class="o">.</span><span class="n">start_position</span><span class="p">()</span>
+ <span class="n">stop_source</span><span class="p">,</span> <span class="n">stop_pos</span> <span class="o">=</span> <span class="n">range_tracker</span><span class="o">.</span><span class="n">stop_position</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">stop_pos</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">stop_source</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="k">for</span> <span class="n">source_ix</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">start_source</span><span class="p">,</span> <span class="n">stop_source</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">range_tracker</span><span class="o">.</span><span class="n">try_claim</span><span class="p">((</span><span class="n">source_ix</span><span class="p">,</span> <span class="kc">None</span><span class="p">)):</span>
+ <span class="k">break</span>
+ <span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span><span class="p">[</span><span class="n">source_ix</span><span class="p">]</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">read</span><span class="p">(</span>
+ <span class="n">range_tracker</span><span class="o">.</span><span class="n">sub_range_tracker</span><span class="p">(</span><span class="n">source_ix</span><span class="p">)):</span>
+ <span class="k">yield</span> <span class="n">record</span></div>
+
+<div class="viewcode-block" id="ConcatSource.default_output_coder"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatSource.default_output_coder">[docs]</a> <span class="k">def</span> <span class="nf">default_output_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span><span class="p">:</span>
+ <span class="c1"># Getting coder from the first sub-sources. This assumes all sub-sources</span>
+ <span class="c1"># to produce the same coder.</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">default_output_coder</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">ConcatSource</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">default_output_coder</span><span class="p">()</span></div></div>
+
+
+<div class="viewcode-block" id="ConcatRangeTracker"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker">[docs]</a><span class="k">class</span> <span class="nc">ConcatRangeTracker</span><span class="p">(</span><span class="n">iobase</span><span class="o">.</span><span class="n">RangeTracker</span><span class="p">):</span>
+ <span class="sd">"""For internal use only; no backwards-compatibility guarantees.</span>
+
+<span class="sd"> Range tracker for ConcatSource"""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">start</span><span class="p">,</span> <span class="n">end</span><span class="p">,</span> <span class="n">source_bundles</span><span class="p">):</span>
+ <span class="sd">"""Initializes ``ConcatRangeTracker``</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> start: start position, a tuple of (source_index, source_position)</span>
+<span class="sd"> end: end position, a tuple of (source_index, source_position)</span>
+<span class="sd"> source_bundles: the list of source bundles in the ConcatSource</span>
+<span class="sd"> """</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">ConcatRangeTracker</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_start</span> <span class="o">=</span> <span class="n">start</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_end</span> <span class="o">=</span> <span class="n">end</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span> <span class="o">=</span> <span class="n">source_bundles</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_lock</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">RLock</span><span class="p">()</span>
+ <span class="c1"># Lazily-initialized list of RangeTrackers corresponding to each source.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_range_trackers</span> <span class="o">=</span> <span class="p">[</span><span class="kc">None</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">source_bundles</span><span class="p">)</span>
+ <span class="c1"># The currently-being-iterated-over (and latest claimed) source.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_claimed_source_ix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_start</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="c1"># Now compute cumulative progress through the sources for converting</span>
+ <span class="c1"># between global fractions and fractions within specific sources.</span>
+ <span class="c1"># TODO(robertwb): Implement fraction-at-position to properly scale</span>
+ <span class="c1"># partial start and end sources.</span>
+ <span class="c1"># Note, however, that in practice splits are typically on source</span>
+ <span class="c1"># boundaries anyways.</span>
+ <span class="n">last</span> <span class="o">=</span> <span class="n">end</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">if</span> <span class="n">end</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">end</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+</span> [...]
+ <span class="bp">self</span><span class="o">.</span><span class="n">_cumulative_weights</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">*</span> <span class="n">start</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_cumulative_weights</span><span class="p">(</span><span class="n">source_bundles</span><span class="p">[</span><span class="n">start</span><span class="p">[</span><span class="mi">0</span><span class="p">]:</span><span class="n">last</span><span class="p">])</span>
+ <span class="o">+</span> <span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">*</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">source_bundles</span><span class="p">)</span> <span class="o">-</span> <span class="n">last</span> <span class="o">-</span> <span class="n">start</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
+
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">_compute_cumulative_weights</span><span class="p">(</span><span class="n">source_bundles</span><span class="p">):</span>
+ <span class="c1"># Two adjacent sources must differ so that they can be uniquely</span>
+ <span class="c1"># identified by a single global fraction. Let min_diff be the</span>
+ <span class="c1"># smallest allowable difference between sources.</span>
+ <span class="n">min_diff</span> <span class="o">=</span> <span class="mf">1e-5</span>
+ <span class="c1"># For the computation below, we need weights for all sources.</span>
+ <span class="c1"># Substitute average weights for those whose weights are</span>
+ <span class="c1"># unspecified (or 1.0 for everything if none are known).</span>
+ <span class="n">known</span> <span class="o">=</span> <span class="p">[</span><span class="n">s</span><span class="o">.</span><span class="n">weight</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">source_bundles</span> <span class="k">if</span> <span class="n">s</span><span class="o">.</span><span class="n">weight</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">]</span>
+ <span class="n">avg</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">known</span><span class="p">)</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="n">known</span><span class="p">)</span> <span class="k">if</span> <span class="n">known</span> <span class="k">else</span> <span class="mf">1.0</span>
+ <span class="n">weights</span> <span class="o">=</span> <span class="p">[</span><span class="n">s</span><span class="o">.</span><span class="n">weight</span> <span class="ow">or</span> <span class="n">avg</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">source_bundles</span><span class="p">]</span>
+
+ <span class="c1"># Now compute running totals of the percent done upon reaching</span>
+ <span class="c1"># each source, with respect to the start and end positions.</span>
+ <span class="c1"># E.g. if the weights were [100, 20, 3] we would produce</span>
+ <span class="c1"># [0.0, 100/123, 120/123, 1.0]</span>
+ <span class="n">total</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="nb">sum</span><span class="p">(</span><span class="n">weights</span><span class="p">))</span>
+ <span class="n">running_total</span> <span class="o">=</span> <span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="k">for</span> <span class="n">w</span> <span class="ow">in</span> <span class="n">weights</span><span class="p">:</span>
+ <span class="n">running_total</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
+ <span class="nb">max</span><span class="p">(</span><span class="n">min_diff</span><span class="p">,</span> <span class="nb">min</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">running_total</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">+</span> <span class="n">w</span> <span class="o">/</span> <span class="n">total</span><span class="p">)))</span>
+ <span class="n">running_total</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span> <span class="c1"># In case of rounding error.</span>
+ <span class="c1"># There are issues if, due to rouding error or greatly differing sizes,</span>
+ <span class="c1"># two adjacent running total weights are equal. Normalize this things so</span>
+ <span class="c1"># that this never happens.</span>
+ <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">running_total</span><span class="p">)):</span>
+ <span class="k">if</span> <span class="n">running_total</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">==</span> <span class="n">running_total</span><span class="p">[</span><span class="n">k</span> <span class="o">-</span> <span class="mi">1</span><span class="p">]:</span>
+ <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">k</span><span class="p">):</span>
+ <span class="n">running_total</span><span class="p">[</span><span class="n">j</span><span class="p">]</span> <span class="o">*=</span> <span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">min_diff</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">running_total</span>
+
+<div class="viewcode-block" id="ConcatRangeTracker.start_position"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker.start_position">[docs]</a> <span class="k">def</span> <span class="nf">start_position</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_start</span></div>
+
+<div class="viewcode-block" id="ConcatRangeTracker.stop_position"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker.stop_position">[docs]</a> <span class="k">def</span> <span class="nf">stop_position</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span></div>
+
+<div class="viewcode-block" id="ConcatRangeTracker.try_claim"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker.try_claim">[docs]</a> <span class="k">def</span> <span class="nf">try_claim</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pos</span><span class="p">):</span>
+ <span class="n">source_ix</span><span class="p">,</span> <span class="n">source_pos</span> <span class="o">=</span> <span class="n">pos</span>
+ <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">_lock</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">source_ix</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">0</span><span class="p">]:</span>
+ <span class="k">return</span> <span class="kc">False</span>
+ <span class="k">elif</span> <span class="n">source_ix</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span clas [...]
+ <span class="k">return</span> <span class="kc">False</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">assert</span> <span class="n">source_ix</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_claimed_source_ix</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_claimed_source_ix</span> <span class="o">=</span> <span class="n">source_ix</span>
+ <span class="k">if</span> <span class="n">source_pos</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="kc">True</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sub_range_tracker</span><span class="p">(</span><span class="n">source_ix</span><span class="p">)</span><span class="o">.</span><span class="n">try_claim</span><span class="p">(</span><span class="n">source_pos</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="ConcatRangeTracker.try_split"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker.try_split">[docs]</a> <span class="k">def</span> <span class="nf">try_split</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pos</span><span class="p">):</span>
+ <span class="n">source_ix</span><span class="p">,</span> <span class="n">source_pos</span> <span class="o">=</span> <span class="n">pos</span>
+ <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">_lock</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">source_ix</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">_claimed_source_ix</span><span class="p">:</span>
+ <span class="c1"># Already claimed.</span>
+ <span class="k">return</span> <span class="kc">None</span>
+ <span class="k">elif</span> <span class="n">source_ix</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">0</span><span class="p">]:</span>
+ <span class="c1"># After end.</span>
+ <span class="k">return</span> <span class="kc">None</span>
+ <span class="k">elif</span> <span class="n">source_ix</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span clas [...]
+ <span class="c1"># At/after end.</span>
+ <span class="k">return</span> <span class="kc">None</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">source_ix</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">_claimed_source_ix</span><span class="p">:</span>
+ <span class="c1"># Prefer to split on even boundary.</span>
+ <span class="n">split_pos</span> <span class="o">=</span> <span class="kc">None</span>
+ <span class="n">ratio</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cumulative_weights</span><span class="p">[</span><span class="n">source_ix</span><span class="p">]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c1"># Split the current subsource.</span>
+ <span class="n">split</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sub_range_tracker</span><span class="p">(</span><span class="n">source_ix</span><span class="p">)</span><span class="o">.</span><span class="n">try_split</span><span class="p">(</span>
+ <span class="n">source_pos</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">split</span><span class="p">:</span>
+ <span class="k">return</span> <span class="kc">None</span>
+ <span class="n">split_pos</span><span class="p">,</span> <span class="n">frac</span> <span class="o">=</span> <span class="n">split</span>
+ <span class="n">ratio</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">local_to_global</span><span class="p">(</span><span class="n">source_ix</span><span class="p">,</span> <span class="n">frac</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_end</span> <span class="o">=</span> <span class="n">source_ix</span><span class="p">,</span> <span class="n">split_pos</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_cumulative_weights</span> <span class="o">=</span> <span class="p">[</span><span class="nb">min</span><span class="p">(</span><span class="n">w</span> <span class="o">/</span> <span class="n">ratio</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">w</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cumulative_weights</span><span class="p">]</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">source_ix</span><span class="p">,</span> <span class="n">split_pos</span><span class="p">),</span> <span class="n">ratio</span></div>
+
+<div class="viewcode-block" id="ConcatRangeTracker.set_current_position"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker.set_current_position">[docs]</a> <span class="k">def</span> <span class="nf">set_current_position</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pos</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'Should only be called on sub-trackers'</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="ConcatRangeTracker.position_at_fraction"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker.position_at_fraction">[docs]</a> <span class="k">def</span> <span class="nf">position_at_fraction</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fraction</span><span class="p">):</span>
+ <span class="n">source_ix</span><span class="p">,</span> <span class="n">source_frac</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">global_to_local</span><span class="p">(</span><span class="n">fraction</span><span class="p">)</span>
+ <span class="n">last</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="bp">se [...]
+ <span class="k">if</span> <span class="n">source_ix</span> <span class="o">==</span> <span class="n">last</span><span class="p">:</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">source_ix</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">source_ix</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sub_range_tracker</span><span class="p">(</span><span class="n">source_ix</span><span class="p">)</span><span class="o">.</span><span class="n">position_at_fraction</span><span class="p">(</span>
+ <span class="n">source_frac</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="ConcatRangeTracker.fraction_consumed"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker.fraction_consumed">[docs]</a> <span class="k">def</span> <span class="nf">fraction_consumed</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">_lock</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">local_to_global</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_claimed_source_ix</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sub_range_tracker</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_claimed_source_ix</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">fraction_consumed</span><span class="p">())</span></div>
+
+<div class="viewcode-block" id="ConcatRangeTracker.local_to_global"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker.local_to_global">[docs]</a> <span class="k">def</span> <span class="nf">local_to_global</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source_ix</span><span class="p">,</span> <span class="n">source_frac</span><span class="p">):</span>
+ <span class="n">cw</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cumulative_weights</span>
+ <span class="c1"># The global fraction is the fraction to source_ix plus some portion of</span>
+ <span class="c1"># the way towards the next source.</span>
+ <span class="k">return</span> <span class="n">cw</span><span class="p">[</span><span class="n">source_ix</span><span class="p">]</span> <span class="o">+</span> <span class="n">source_frac</span> <span class="o">*</span> <span class="p">(</span><span class="n">cw</span><span class="p">[</span><span class="n">source_ix</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span> <span class="o">-</span> <span class="n">cw</span><span class="p">[</span><span class= [...]
+
+<div class="viewcode-block" id="ConcatRangeTracker.global_to_local"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker.global_to_local">[docs]</a> <span class="k">def</span> <span class="nf">global_to_local</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">frac</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">frac</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">last</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="bp"> [...]
+ <span class="k">return</span> <span class="p">(</span><span class="n">last</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">cw</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cumulative_weights</span>
+ <span class="c1"># Find the last source that starts at or before frac.</span>
+ <span class="n">source_ix</span> <span class="o">=</span> <span class="n">bisect</span><span class="o">.</span><span class="n">bisect</span><span class="p">(</span><span class="n">cw</span><span class="p">,</span> <span class="n">frac</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span>
+ <span class="c1"># Return this source, converting what's left of frac after starting</span>
+ <span class="c1"># this source into a value in [0.0, 1.0) representing how far we are</span>
+ <span class="c1"># towards the next source.</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">source_ix</span><span class="p">,</span>
+ <span class="p">(</span><span class="n">frac</span> <span class="o">-</span> <span class="n">cw</span><span class="p">[</span><span class="n">source_ix</span><span class="p">])</span> <span class="o">/</span> <span class="p">(</span><span class="n">cw</span><span class="p">[</span><span class="n">source_ix</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span> <span class="o">-</span> <span class="n">cw</span><span class="p">[</span><span class="n [...]
+
+<div class="viewcode-block" id="ConcatRangeTracker.sub_range_tracker"><a class="viewcode-back" href="../../../apache_beam.io.concat_source.html#apache_beam.io.concat_source.ConcatRangeTracker.sub_range_tracker">[docs]</a> <span class="k">def</span> <span class="nf">sub_range_tracker</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source_ix</span><span class="p">):</span>
+ <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_start</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o"><=</span> <span class="n">source_ix</span> <span class="o"><=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_range_trackers</span><span class="p">[</span><span class="n">source_ix</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">_lock</span><span class="p">:</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_range_trackers</span><span class="p">[</span><span class="n">source_ix</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">source</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_bundles</span><span class="p">[</span><span class="n">source_ix</span><span class="p">]</span>
+ <span class="k">if</span> <span class="n">source_ix</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">_start</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_start</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <spa [...]
+ <span class="n">start</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_start</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">start</span> <span class="o">=</span> <span class="n">source</span><span class="o">.</span><span class="n">start_position</span>
+ <span class="k">if</span> <span class="n">source_ix</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span cl [...]
+ <span class="n">stop</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_end</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">stop</span> <span class="o">=</span> <span class="n">source</span><span class="o">.</span><span class="n">stop_position</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_range_trackers</span><span class="p">[</span><span class="n">source_ix</span><span class="p">]</span> <span class="o">=</span> <span class="n">source</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">get_range_tracker</span><span class="p">(</span>
+ <span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_range_trackers</span><span class="p">[</span><span class="n">source_ix</span><span class="p">]</span></div></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filebasedsink.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filebasedsink.html
new file mode 100644
index 0000000..185af54
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filebasedsink.html
@@ -0,0 +1,537 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.io.filebasedsink — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.io.filebasedsink</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.io.filebasedsink</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""File-based sink."""</span>
+
+<span class="kn">from</span> <span class="nn">__future__</span> <span class="k">import</span> <span class="n">absolute_import</span>
+
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">os</span>
+<span class="kn">import</span> <span class="nn">re</span>
+<span class="kn">import</span> <span class="nn">time</span>
+<span class="kn">import</span> <span class="nn">uuid</span>
+
+<span class="kn">from</span> <span class="nn">apache_beam.internal</span> <span class="k">import</span> <span class="n">util</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io</span> <span class="k">import</span> <span class="n">iobase</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.filesystem</span> <span class="k">import</span> <span class="n">BeamIOError</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.filesystem</span> <span class="k">import</span> <span class="n">CompressionTypes</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.filesystems</span> <span class="k">import</span> <span class="n">FileSystems</span>
+<span class="kn">from</span> <span class="nn">apache_beam.options.value_provider</span> <span class="k">import</span> <span class="n">StaticValueProvider</span>
+<span class="kn">from</span> <span class="nn">apache_beam.options.value_provider</span> <span class="k">import</span> <span class="n">ValueProvider</span>
+<span class="kn">from</span> <span class="nn">apache_beam.options.value_provider</span> <span class="k">import</span> <span class="n">check_accessible</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms.display</span> <span class="k">import</span> <span class="n">DisplayDataItem</span>
+
+<span class="n">DEFAULT_SHARD_NAME_TEMPLATE</span> <span class="o">=</span> <span class="s1">'-SSSSS-of-NNNNN'</span>
+
+<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'FileBasedSink'</span><span class="p">]</span>
+
+
+<div class="viewcode-block" id="FileBasedSink"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsink.html#apache_beam.io.filebasedsink.FileBasedSink">[docs]</a><span class="k">class</span> <span class="nc">FileBasedSink</span><span class="p">(</span><span class="n">iobase</span><span class="o">.</span><span class="n">Sink</span><span class="p">):</span>
+ <span class="sd">"""A sink to a GCS or local files.</span>
+
+<span class="sd"> To implement a file-based sink, extend this class and override</span>
+<span class="sd"> either :meth:`.write_record()` or :meth:`.write_encoded_record()`.</span>
+
+<span class="sd"> If needed, also overwrite :meth:`.open()` and/or :meth:`.close()` to customize</span>
+<span class="sd"> the file handling or write headers and footers.</span>
+
+<span class="sd"> The output of this write is a :class:`~apache_beam.pvalue.PCollection` of</span>
+<span class="sd"> all written shards.</span>
+<span class="sd"> """</span>
+
+ <span class="c1"># Max number of threads to be used for renaming.</span>
+ <span class="n">_MAX_RENAME_THREADS</span> <span class="o">=</span> <span class="mi">64</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">file_path_prefix</span><span class="p">,</span>
+ <span class="n">coder</span><span class="p">,</span>
+ <span class="n">file_name_suffix</span><span class="o">=</span><span class="s1">''</span><span class="p">,</span>
+ <span class="n">num_shards</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
+ <span class="n">shard_name_template</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+ <span class="n">mime_type</span><span class="o">=</span><span class="s1">'application/octet-stream'</span><span class="p">,</span>
+ <span class="n">compression_type</span><span class="o">=</span><span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Raises:</span>
+<span class="sd"> ~exceptions.TypeError: if file path parameters are not a :class:`str` or</span>
+<span class="sd"> :class:`~apache_beam.options.value_provider.ValueProvider`, or if</span>
+<span class="sd"> **compression_type** is not member of</span>
+<span class="sd"> :class:`~apache_beam.io.filesystem.CompressionTypes`.</span>
+<span class="sd"> ~exceptions.ValueError: if **shard_name_template** is not of expected</span>
+<span class="sd"> format.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file_path_prefix</span><span class="p">,</span> <span class="p">(</span><span class="n">basestring</span><span class="p">,</span> <span class="n">ValueProvider</span><span class="p">)):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'file_path_prefix must be a string or ValueProvider;'</span>
+ <span class="s1">'got </span><span class="si">%r</span><span class="s1"> instead'</span> <span class="o">%</span> <span class="n">file_path_prefix</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file_name_suffix</span><span class="p">,</span> <span class="p">(</span><span class="n">basestring</span><span class="p">,</span> <span class="n">ValueProvider</span><span class="p">)):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'file_name_suffix must be a string or ValueProvider;'</span>
+ <span class="s1">'got </span><span class="si">%r</span><span class="s1"> instead'</span> <span class="o">%</span> <span class="n">file_name_suffix</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">is_valid_compression_type</span><span class="p">(</span><span class="n">compression_type</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'compression_type must be CompressionType object but '</span>
+ <span class="s1">'was </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">compression_type</span><span class="p">))</span>
+ <span class="k">if</span> <span class="n">shard_name_template</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">shard_name_template</span> <span class="o">=</span> <span class="n">DEFAULT_SHARD_NAME_TEMPLATE</span>
+ <span class="k">elif</span> <span class="n">shard_name_template</span> <span class="o">==</span> <span class="s1">''</span><span class="p">:</span>
+ <span class="n">num_shards</span> <span class="o">=</span> <span class="mi">1</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file_path_prefix</span><span class="p">,</span> <span class="n">basestring</span><span class="p">):</span>
+ <span class="n">file_path_prefix</span> <span class="o">=</span> <span class="n">StaticValueProvider</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">file_path_prefix</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file_name_suffix</span><span class="p">,</span> <span class="n">basestring</span><span class="p">):</span>
+ <span class="n">file_name_suffix</span> <span class="o">=</span> <span class="n">StaticValueProvider</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">file_name_suffix</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">file_path_prefix</span> <span class="o">=</span> <span class="n">file_path_prefix</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">file_name_suffix</span> <span class="o">=</span> <span class="n">file_name_suffix</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">num_shards</span> <span class="o">=</span> <span class="n">num_shards</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">coder</span> <span class="o">=</span> <span class="n">coder</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">shard_name_format</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_template_to_format</span><span class="p">(</span><span class="n">shard_name_template</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">compression_type</span> <span class="o">=</span> <span class="n">compression_type</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">mime_type</span> <span class="o">=</span> <span class="n">mime_type</span>
+
+<div class="viewcode-block" id="FileBasedSink.display_data"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsink.html#apache_beam.io.filebasedsink.FileBasedSink.display_data">[docs]</a> <span class="k">def</span> <span class="nf">display_data</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span><span class="s1">'shards'</span><span class="p">:</span>
+ <span class="n">DisplayDataItem</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">num_shards</span><span class="p">,</span>
+ <span class="n">label</span><span class="o">=</span><span class="s1">'Number of Shards'</span><span class="p">)</span><span class="o">.</span><span class="n">drop_if_default</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span>
+ <span class="s1">'compression'</span><span class="p">:</span>
+ <span class="n">DisplayDataItem</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">compression_type</span><span class="p">)),</span>
+ <span class="s1">'file_pattern'</span><span class="p">:</span>
+ <span class="n">DisplayDataItem</span><span class="p">(</span><span class="s1">'</span><span class="si">{}{}{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">file_path_prefix</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">shard_name_format</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">file_name_suffix</span><span class="p">),</span>
+ <span class="n">label</span><span class="o">=</span><span class="s1">'File Pattern'</span><span class="p">)}</span></div>
+
+<div class="viewcode-block" id="FileBasedSink.open"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsink.html#apache_beam.io.filebasedsink.FileBasedSink.open">[docs]</a> <span class="nd">@check_accessible</span><span class="p">([</span><span class="s1">'file_path_prefix'</span><span class="p">])</span>
+ <span class="k">def</span> <span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">temp_path</span><span class="p">):</span>
+ <span class="sd">"""Opens ``temp_path``, returning an opaque file handle object.</span>
+
+<span class="sd"> The returned file handle is passed to ``write_[encoded_]record`` and</span>
+<span class="sd"> ``close``.</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">temp_path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mime_type</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">compression_type</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileBasedSink.write_record"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsink.html#apache_beam.io.filebasedsink.FileBasedSink.write_record">[docs]</a> <span class="k">def</span> <span class="nf">write_record</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_handle</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="sd">"""Writes a single record go the file handle returned by ``open()``.</span>
+
+<span class="sd"> By default, calls ``write_encoded_record`` after encoding the record with</span>
+<span class="sd"> this sink's Coder.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">write_encoded_record</span><span class="p">(</span><span class="n">file_handle</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">coder</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">value</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="FileBasedSink.write_encoded_record"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsink.html#apache_beam.io.filebasedsink.FileBasedSink.write_encoded_record">[docs]</a> <span class="k">def</span> <span class="nf">write_encoded_record</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_handle</span><span class="p">,</span> <span class="n">encoded_value</span><span class="p">):</span>
+ <span class="sd">"""Writes a single encoded record to the file handle returned by ``open()``.</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileBasedSink.close"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsink.html#apache_beam.io.filebasedsink.FileBasedSink.close">[docs]</a> <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_handle</span><span class="p">):</span>
+ <span class="sd">"""Finalize and close the file handle returned from ``open()``.</span>
+
+<span class="sd"> Called after all records are written.</span>
+
+<span class="sd"> By default, calls ``file_handle.close()`` iff it is not None.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="n">file_handle</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">file_handle</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="FileBasedSink.initialize_write"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsink.html#apache_beam.io.filebasedsink.FileBasedSink.initialize_write">[docs]</a> <span class="nd">@check_accessible</span><span class="p">([</span><span class="s1">'file_path_prefix'</span><span class="p">,</span> <span class="s1">'file_name_suffix'</span><span class="p">])</span>
+ <span class="k">def</span> <span class="nf">initialize_write</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">file_path_prefix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_path_prefix</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+
+ <span class="n">tmp_dir</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_create_temp_dir</span><span class="p">(</span><span class="n">file_path_prefix</span><span class="p">)</span>
+ <span class="n">FileSystems</span><span class="o">.</span><span class="n">mkdirs</span><span class="p">(</span><span class="n">tmp_dir</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">tmp_dir</span></div>
+
+ <span class="k">def</span> <span class="nf">_create_temp_dir</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_path_prefix</span><span class="p">):</span>
+ <span class="n">base_path</span><span class="p">,</span> <span class="n">last_component</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">file_path_prefix</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">last_component</span><span class="p">:</span>
+ <span class="c1"># Trying to re-split the base_path to check if it's a root.</span>
+ <span class="n">new_base_path</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">base_path</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">base_path</span> <span class="o">==</span> <span class="n">new_base_path</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Cannot create a temporary directory for root path '</span>
+ <span class="s1">'prefix </span><span class="si">%s</span><span class="s1">. Please specify a file path prefix with '</span>
+ <span class="s1">'at least two components.'</span><span class="p">,</span>
+ <span class="n">file_path_prefix</span><span class="p">)</span>
+ <span class="n">path_components</span> <span class="o">=</span> <span class="p">[</span><span class="n">base_path</span><span class="p">,</span>
+ <span class="s1">'beam-temp-'</span> <span class="o">+</span> <span class="n">last_component</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid1</span><span class="p">()</span><span class="o">.</span><span class="n">hex</span><span class="p">]</span>
+ <span class="k">return</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="o">*</span><span class="n">path_components</span><span class="p">)</span>
+
+<div class="viewcode-block" id="FileBasedSink.open_writer"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsink.html#apache_beam.io.filebasedsink.FileBasedSink.open_writer">[docs]</a> <span class="nd">@check_accessible</span><span class="p">([</span><span class="s1">'file_path_prefix'</span><span class="p">,</span> <span class="s1">'file_name_suffix'</span><span class="p">])</span>
+ <span class="k">def</span> <span class="nf">open_writer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">init_result</span><span class="p">,</span> <span class="n">uid</span><span class="p">):</span>
+ <span class="c1"># A proper suffix is needed for AUTO compression detection.</span>
+ <span class="c1"># We also ensure there will be no collisions with uid and a</span>
+ <span class="c1"># (possibly unsharded) file_path_prefix and a (possibly empty)</span>
+ <span class="c1"># file_name_suffix.</span>
+ <span class="n">file_path_prefix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_path_prefix</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+ <span class="n">file_name_suffix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_name_suffix</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+ <span class="n">suffix</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="s1">'.'</span> <span class="o">+</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">basename</span><span class="p">(</span><span class="n">file_path_prefix</span><span class="p">)</span> <span class="o">+</span> <span class="n">file_name_suffix</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">FileBasedSinkWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">init_result</span><span class="p">,</span> <span class="n">uid</span><span class="p">)</span> <span class="o">+</span> <span class="n">suffix</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileBasedSink.finalize_write"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsink.html#apache_beam.io.filebasedsink.FileBasedSink.finalize_write">[docs]</a> <span class="nd">@check_accessible</span><span class="p">([</span><span class="s1">'file_path_prefix'</span><span class="p">,</span> <span class="s1">'file_name_suffix'</span><span class="p">])</span>
+ <span class="k">def</span> <span class="nf">finalize_write</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">init_result</span><span class="p">,</span> <span class="n">writer_results</span><span class="p">):</span>
+ <span class="n">file_path_prefix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_path_prefix</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+ <span class="n">file_name_suffix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_name_suffix</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+ <span class="n">writer_results</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">writer_results</span><span class="p">)</span>
+ <span class="n">num_shards</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">writer_results</span><span class="p">)</span>
+ <span class="n">min_threads</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">num_shards</span><span class="p">,</span> <span class="n">FileBasedSink</span><span class="o">.</span><span class="n">_MAX_RENAME_THREADS</span><span class="p">)</span>
+ <span class="n">num_threads</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">min_threads</span><span class="p">)</span>
+
+ <span class="n">source_files</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="n">destination_files</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="n">chunk_size</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_chunk_size</span><span class="p">(</span><span class="n">file_path_prefix</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">shard_num</span><span class="p">,</span> <span class="n">shard</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">writer_results</span><span class="p">):</span>
+ <span class="n">final_name</span> <span class="o">=</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">([</span>
+ <span class="n">file_path_prefix</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">shard_name_format</span> <span class="o">%</span> <span class="nb">dict</span><span class="p">(</span>
+ <span class="n">shard_num</span><span class="o">=</span><span class="n">shard_num</span><span class="p">,</span> <span class="n">num_shards</span><span class="o">=</span><span class="n">num_shards</span><span class="p">),</span> <span class="n">file_name_suffix</span>
+ <span class="p">])</span>
+ <span class="n">source_files</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">shard</span><span class="p">)</span>
+ <span class="n">destination_files</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">final_name</span><span class="p">)</span>
+
+ <span class="n">source_file_batch</span> <span class="o">=</span> <span class="p">[</span><span class="n">source_files</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span> <span class="o">+</span> <span class="n">chunk_size</span><span class="p">]</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">source_files</span><span class="p">),</span>
+ <span class="n">chunk_size</span><span class="p">)]</span>
+ <span class="n">destination_file_batch</span> <span class="o">=</span> <span class="p">[</span><span class="n">destination_files</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span> <span class="o">+</span> <span class="n">chunk_size</span><span class="p">]</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">destination_files</span><span class="p">),</span>
+ <span class="n">chunk_size</span><span class="p">)]</span>
+
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s1">'Starting finalize_write threads with num_shards: </span><span class="si">%d</span><span class="s1">, '</span>
+ <span class="s1">'batches: </span><span class="si">%d</span><span class="s1">, num_threads: </span><span class="si">%d</span><span class="s1">'</span><span class="p">,</span>
+ <span class="n">num_shards</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">source_file_batch</span><span class="p">),</span> <span class="n">num_threads</span><span class="p">)</span>
+ <span class="n">start_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
+
+ <span class="c1"># Use a thread pool for renaming operations.</span>
+ <span class="k">def</span> <span class="nf">_rename_batch</span><span class="p">(</span><span class="n">batch</span><span class="p">):</span>
+ <span class="sd">"""_rename_batch executes batch rename operations."""</span>
+ <span class="n">source_files</span><span class="p">,</span> <span class="n">destination_files</span> <span class="o">=</span> <span class="n">batch</span>
+ <span class="n">exceptions</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">FileSystems</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">source_files</span><span class="p">,</span> <span class="n">destination_files</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">exceptions</span>
+ <span class="k">except</span> <span class="n">BeamIOError</span> <span class="k">as</span> <span class="n">exp</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">exp</span><span class="o">.</span><span class="n">exception_details</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">raise</span>
+ <span class="k">for</span> <span class="p">(</span><span class="n">src</span><span class="p">,</span> <span class="n">dest</span><span class="p">),</span> <span class="n">exception</span> <span class="ow">in</span> <span class="n">exp</span><span class="o">.</span><span class="n">exception_details</span><span class="o">.</span><span class="n">iteritems</span><span class="p">():</span>
+ <span class="k">if</span> <span class="n">exception</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s1">'Rename not successful: </span><span class="si">%s</span><span class="s1"> -> </span><span class="si">%s</span><span class="s1">, </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">src</span><span class="p">,</span> <span class="n">dest</span><span class="p">,</span>
+ <span class="n">exception</span><span class="p">)</span>
+ <span class="n">should_report</span> <span class="o">=</span> <span class="kc">True</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">exception</span><span class="p">,</span> <span class="ne">IOError</span><span class="p">):</span>
+ <span class="c1"># May have already been copied.</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">dest</span><span class="p">):</span>
+ <span class="n">should_report</span> <span class="o">=</span> <span class="kc">False</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">exists_e</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s1">'Exception when checking if file </span><span class="si">%s</span><span class="s1"> exists: '</span>
+ <span class="s1">'</span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">dest</span><span class="p">,</span> <span class="n">exists_e</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">should_report</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">((</span><span class="s1">'Exception in _rename_batch. src: </span><span class="si">%s</span><span class="s1">, '</span>
+ <span class="s1">'dest: </span><span class="si">%s</span><span class="s1">, err: </span><span class="si">%s</span><span class="s1">'</span><span class="p">),</span> <span class="n">src</span><span class="p">,</span> <span class="n">dest</span><span class="p">,</span> <span class="n">exception</span><span class="p">)</span>
+ <span class="n">exceptions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">exception</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'Rename successful: </span><span class="si">%s</span><span class="s1"> -> </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">src</span><span class="p">,</span> <span class="n">dest</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">exceptions</span>
+
+ <span class="n">exception_batches</span> <span class="o">=</span> <span class="n">util</span><span class="o">.</span><span class="n">run_using_threadpool</span><span class="p">(</span>
+ <span class="n">_rename_batch</span><span class="p">,</span> <span class="nb">zip</span><span class="p">(</span><span class="n">source_file_batch</span><span class="p">,</span> <span class="n">destination_file_batch</span><span class="p">),</span>
+ <span class="n">num_threads</span><span class="p">)</span>
+
+ <span class="n">all_exceptions</span> <span class="o">=</span> <span class="p">[</span><span class="n">e</span> <span class="k">for</span> <span class="n">exception_batch</span> <span class="ow">in</span> <span class="n">exception_batches</span>
+ <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">exception_batch</span><span class="p">]</span>
+ <span class="k">if</span> <span class="n">all_exceptions</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s1">'Encountered exceptions in finalize_write: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span>
+ <span class="n">all_exceptions</span><span class="p">)</span>
+
+ <span class="k">for</span> <span class="n">final_name</span> <span class="ow">in</span> <span class="n">destination_files</span><span class="p">:</span>
+ <span class="k">yield</span> <span class="n">final_name</span>
+
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Renamed </span><span class="si">%d</span><span class="s1"> shards in </span><span class="si">%.2f</span><span class="s1"> seconds.'</span><span class="p">,</span> <span class="n">num_shards</span><span class="p">,</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">start_time</span><span class="p">)</span>
+
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">FileSystems</span><span class="o">.</span><span class="n">delete</span><span class="p">([</span><span class="n">init_result</span><span class="p">])</span>
+ <span class="k">except</span> <span class="ne">IOError</span><span class="p">:</span>
+ <span class="c1"># May have already been removed.</span>
+ <span class="k">pass</span></div>
+
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">_template_to_format</span><span class="p">(</span><span class="n">shard_name_template</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">shard_name_template</span><span class="p">:</span>
+ <span class="k">return</span> <span class="s1">''</span>
+ <span class="n">m</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'S+'</span><span class="p">,</span> <span class="n">shard_name_template</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">m</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Shard number pattern S+ not found in template '</span><span class="si">%s</span><span class="s2">'"</span> <span class="o">%</span>
+ <span class="n">shard_name_template</span><span class="p">)</span>
+ <span class="n">shard_name_format</span> <span class="o">=</span> <span class="n">shard_name_template</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span>
+ <span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="s1">'</span><span class="si">%%</span><span class="s1">(shard_num)0</span><span class="si">%d</span><span class="s1">d'</span> <span class="o">%</span> <span class="nb">len</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span cla [...]
+ <span class="n">m</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'N+'</span><span class="p">,</span> <span class="n">shard_name_format</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">m</span><span class="p">:</span>
+ <span class="n">shard_name_format</span> <span class="o">=</span> <span class="n">shard_name_format</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span>
+ <span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="s1">'</span><span class="si">%%</span><span class="s1">(num_shards)0</span><span class="si">%d</span><span class="s1">d'</span> <span class="o">%</span> <span class="nb">len</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span [...]
+ <span class="k">return</span> <span class="n">shard_name_format</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="c1"># TODO: Clean up workitem_test which uses this.</span>
+ <span class="c1"># pylint: disable=unidiomatic-typecheck</span>
+ <span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="vm">__dict__</span></div>
+
+
+<span class="k">class</span> <span class="nc">FileBasedSinkWriter</span><span class="p">(</span><span class="n">iobase</span><span class="o">.</span><span class="n">Writer</span><span class="p">):</span>
+ <span class="sd">"""The writer for FileBasedSink.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sink</span><span class="p">,</span> <span class="n">temp_shard_path</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sink</span> <span class="o">=</span> <span class="n">sink</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">temp_shard_path</span> <span class="o">=</span> <span class="n">temp_shard_path</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">temp_handle</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sink</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">temp_shard_path</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sink</span><span class="o">.</span><span class="n">write_record</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">temp_handle</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sink</span><span class="o">.</span><span class="n">close</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">temp_handle</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">temp_shard_path</span>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filebasedsource.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filebasedsource.html
new file mode 100644
index 0000000..0478669
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filebasedsource.html
@@ -0,0 +1,649 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.io.filebasedsource — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.io.filebasedsource</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.io.filebasedsource</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""A framework for developing sources for new file types.</span>
+
+<span class="sd">To create a source for a new file type a sub-class of :class:`FileBasedSource`</span>
+<span class="sd">should be created. Sub-classes of :class:`FileBasedSource` must implement the</span>
+<span class="sd">method :meth:`FileBasedSource.read_records()`. Please read the documentation of</span>
+<span class="sd">that method for more details.</span>
+
+<span class="sd">For an example implementation of :class:`FileBasedSource` see</span>
+<span class="sd">:class:`~apache_beam.io._AvroSource`.</span>
+<span class="sd">"""</span>
+
+<span class="kn">from</span> <span class="nn">six</span> <span class="k">import</span> <span class="n">integer_types</span>
+
+<span class="kn">from</span> <span class="nn">apache_beam.internal</span> <span class="k">import</span> <span class="n">pickler</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io</span> <span class="k">import</span> <span class="n">concat_source</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io</span> <span class="k">import</span> <span class="n">iobase</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io</span> <span class="k">import</span> <span class="n">range_trackers</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.filesystem</span> <span class="k">import</span> <span class="n">CompressionTypes</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.filesystems</span> <span class="k">import</span> <span class="n">FileSystems</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.restriction_trackers</span> <span class="k">import</span> <span class="n">OffsetRange</span>
+<span class="kn">from</span> <span class="nn">apache_beam.options.value_provider</span> <span class="k">import</span> <span class="n">StaticValueProvider</span>
+<span class="kn">from</span> <span class="nn">apache_beam.options.value_provider</span> <span class="k">import</span> <span class="n">ValueProvider</span>
+<span class="kn">from</span> <span class="nn">apache_beam.options.value_provider</span> <span class="k">import</span> <span class="n">check_accessible</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms.core</span> <span class="k">import</span> <span class="n">DoFn</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms.core</span> <span class="k">import</span> <span class="n">ParDo</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms.core</span> <span class="k">import</span> <span class="n">PTransform</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms.display</span> <span class="k">import</span> <span class="n">DisplayDataItem</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms.util</span> <span class="k">import</span> <span class="n">Reshuffle</span>
+
+<span class="n">MAX_NUM_THREADS_FOR_SIZE_ESTIMATION</span> <span class="o">=</span> <span class="mi">25</span>
+
+<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'FileBasedSource'</span><span class="p">]</span>
+
+
+<div class="viewcode-block" id="FileBasedSource"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsource.html#apache_beam.io.filebasedsource.FileBasedSource">[docs]</a><span class="k">class</span> <span class="nc">FileBasedSource</span><span class="p">(</span><span class="n">iobase</span><span class="o">.</span><span class="n">BoundedSource</span><span class="p">):</span>
+ <span class="sd">"""A :class:`~apache_beam.io.iobase.BoundedSource` for reading a file glob of</span>
+<span class="sd"> a given type."""</span>
+
+ <span class="n">MIN_NUMBER_OF_FILES_TO_STAT</span> <span class="o">=</span> <span class="mi">100</span>
+ <span class="n">MIN_FRACTION_OF_FILES_TO_STAT</span> <span class="o">=</span> <span class="mf">0.01</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">file_pattern</span><span class="p">,</span>
+ <span class="n">min_bundle_size</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
+ <span class="n">compression_type</span><span class="o">=</span><span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">,</span>
+ <span class="n">splittable</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
+ <span class="n">validate</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
+ <span class="sd">"""Initializes :class:`FileBasedSource`.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> file_pattern (str): the file glob to read a string or a</span>
+<span class="sd"> :class:`~apache_beam.options.value_provider.ValueProvider`</span>
+<span class="sd"> (placeholder to inject a runtime value).</span>
+<span class="sd"> min_bundle_size (str): minimum size of bundles that should be generated</span>
+<span class="sd"> when performing initial splitting on this source.</span>
+<span class="sd"> compression_type (str): Used to handle compressed output files.</span>
+<span class="sd"> Typical value is :attr:`CompressionTypes.AUTO</span>
+<span class="sd"> <apache_beam.io.filesystem.CompressionTypes.AUTO>`,</span>
+<span class="sd"> in which case the final file path's extension will be used to detect</span>
+<span class="sd"> the compression.</span>
+<span class="sd"> splittable (bool): whether :class:`FileBasedSource` should try to</span>
+<span class="sd"> logically split a single file into data ranges so that different parts</span>
+<span class="sd"> of the same file can be read in parallel. If set to :data:`False`,</span>
+<span class="sd"> :class:`FileBasedSource` will prevent both initial and dynamic splitting</span>
+<span class="sd"> of sources for single files. File patterns that represent multiple files</span>
+<span class="sd"> may still get split into sources for individual files. Even if set to</span>
+<span class="sd"> :data:`True` by the user, :class:`FileBasedSource` may choose to not</span>
+<span class="sd"> split the file, for example, for compressed files where currently it is</span>
+<span class="sd"> not possible to efficiently read a data range without decompressing the</span>
+<span class="sd"> whole file.</span>
+<span class="sd"> validate (bool): Boolean flag to verify that the files exist during the</span>
+<span class="sd"> pipeline creation time.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ~exceptions.TypeError: when **compression_type** is not valid or if</span>
+<span class="sd"> **file_pattern** is not a :class:`str` or a</span>
+<span class="sd"> :class:`~apache_beam.options.value_provider.ValueProvider`.</span>
+<span class="sd"> ~exceptions.ValueError: when compression and splittable files are</span>
+<span class="sd"> specified.</span>
+<span class="sd"> ~exceptions.IOError: when the file pattern specified yields an empty</span>
+<span class="sd"> result.</span>
+<span class="sd"> """</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file_pattern</span><span class="p">,</span> <span class="p">(</span><span class="n">basestring</span><span class="p">,</span> <span class="n">ValueProvider</span><span class="p">)):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'</span><span class="si">%s</span><span class="s1">: file_pattern must be of type string'</span>
+ <span class="s1">' or ValueProvider; got </span><span class="si">%r</span><span class="s1"> instead'</span>
+ <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> <span class="n">file_pattern</span><span class="p">))</span>
+
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file_pattern</span><span class="p">,</span> <span class="n">basestring</span><span class="p">):</span>
+ <span class="n">file_pattern</span> <span class="o">=</span> <span class="n">StaticValueProvider</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">file_pattern</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_pattern</span> <span class="o">=</span> <span class="n">file_pattern</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_concat_source</span> <span class="o">=</span> <span class="kc">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_min_bundle_size</span> <span class="o">=</span> <span class="n">min_bundle_size</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">is_valid_compression_type</span><span class="p">(</span><span class="n">compression_type</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'compression_type must be CompressionType object but '</span>
+ <span class="s1">'was </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">compression_type</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">=</span> <span class="n">compression_type</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span> <span class="o">=</span> <span class="n">splittable</span>
+ <span class="k">if</span> <span class="n">validate</span> <span class="ow">and</span> <span class="n">file_pattern</span><span class="o">.</span><span class="n">is_accessible</span><span class="p">():</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_validate</span><span class="p">()</span>
+
+<div class="viewcode-block" id="FileBasedSource.display_data"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsource.html#apache_beam.io.filebasedsource.FileBasedSource.display_data">[docs]</a> <span class="k">def</span> <span class="nf">display_data</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">{</span><span class="s1">'file_pattern'</span><span class="p">:</span> <span class="n">DisplayDataItem</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_pattern</span><span class="p">),</span>
+ <span class="n">label</span><span class="o">=</span><span class="s2">"File Pattern"</span><span class="p">),</span>
+ <span class="s1">'compression'</span><span class="p">:</span> <span class="n">DisplayDataItem</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span><span class="p">),</span>
+ <span class="n">label</span><span class="o">=</span><span class="s1">'Compression Type'</span><span class="p">)}</span></div>
+
+ <span class="nd">@check_accessible</span><span class="p">([</span><span class="s1">'_pattern'</span><span class="p">])</span>
+ <span class="k">def</span> <span class="nf">_get_concat_source</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_concat_source</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">pattern</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_pattern</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+
+ <span class="n">single_file_sources</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="n">match_result</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">match</span><span class="p">([</span><span class="n">pattern</span><span class="p">])[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="n">files_metadata</span> <span class="o">=</span> <span class="n">match_result</span><span class="o">.</span><span class="n">metadata_list</span>
+
+ <span class="c1"># We create a reference for FileBasedSource that will be serialized along</span>
+ <span class="c1"># with each _SingleFileSource. To prevent this FileBasedSource from having</span>
+ <span class="c1"># a reference to ConcatSource (resulting in quadratic space complexity)</span>
+ <span class="c1"># we clone it here.</span>
+ <span class="n">file_based_source_ref</span> <span class="o">=</span> <span class="n">pickler</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">pickler</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+
+ <span class="k">for</span> <span class="n">file_metadata</span> <span class="ow">in</span> <span class="n">files_metadata</span><span class="p">:</span>
+ <span class="n">file_name</span> <span class="o">=</span> <span class="n">file_metadata</span><span class="o">.</span><span class="n">path</span>
+ <span class="n">file_size</span> <span class="o">=</span> <span class="n">file_metadata</span><span class="o">.</span><span class="n">size_in_bytes</span>
+ <span class="k">if</span> <span class="n">file_size</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">continue</span> <span class="c1"># Ignoring empty file.</span>
+
+ <span class="c1"># We determine splittability of this specific file.</span>
+ <span class="n">splittable</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">splittable</span> <span class="ow">and</span>
+ <span class="n">_determine_splittability_from_compression_type</span><span class="p">(</span>
+ <span class="n">file_name</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span><span class="p">))</span>
+
+ <span class="n">single_file_source</span> <span class="o">=</span> <span class="n">_SingleFileSource</span><span class="p">(</span>
+ <span class="n">file_based_source_ref</span><span class="p">,</span> <span class="n">file_name</span><span class="p">,</span>
+ <span class="mi">0</span><span class="p">,</span>
+ <span class="n">file_size</span><span class="p">,</span>
+ <span class="n">min_bundle_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_min_bundle_size</span><span class="p">,</span>
+ <span class="n">splittable</span><span class="o">=</span><span class="n">splittable</span><span class="p">)</span>
+ <span class="n">single_file_sources</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">single_file_source</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_concat_source</span> <span class="o">=</span> <span class="n">concat_source</span><span class="o">.</span><span class="n">ConcatSource</span><span class="p">(</span><span class="n">single_file_sources</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_concat_source</span>
+
+<div class="viewcode-block" id="FileBasedSource.open_file"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsource.html#apache_beam.io.filebasedsource.FileBasedSource.open_file">[docs]</a> <span class="k">def</span> <span class="nf">open_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_name</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">open</span><span class="p">(</span>
+ <span class="n">file_name</span><span class="p">,</span> <span class="s1">'application/octet-stream'</span><span class="p">,</span>
+ <span class="n">compression_type</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span><span class="p">)</span></div>
+
+ <span class="nd">@check_accessible</span><span class="p">([</span><span class="s1">'_pattern'</span><span class="p">])</span>
+ <span class="k">def</span> <span class="nf">_validate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Validate if there are actual files in the specified glob pattern</span>
+<span class="sd"> """</span>
+ <span class="n">pattern</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_pattern</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+
+ <span class="c1"># Limit the responses as we only want to check if something exists</span>
+ <span class="n">match_result</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">match</span><span class="p">([</span><span class="n">pattern</span><span class="p">],</span> <span class="n">limits</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">])[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">match_result</span><span class="o">.</span><span class="n">metadata_list</span><span class="p">)</span> <span class="o"><=</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">IOError</span><span class="p">(</span>
+ <span class="s1">'No files found based on the file pattern </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">pattern</span><span class="p">)</span>
+
+<div class="viewcode-block" id="FileBasedSource.split"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsource.html#apache_beam.io.filebasedsource.FileBasedSource.split">[docs]</a> <span class="k">def</span> <span class="nf">split</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">desired_bundle_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">start_position</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">stop_position</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_concat_source</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">(</span>
+ <span class="n">desired_bundle_size</span><span class="o">=</span><span class="n">desired_bundle_size</span><span class="p">,</span>
+ <span class="n">start_position</span><span class="o">=</span><span class="n">start_position</span><span class="p">,</span>
+ <span class="n">stop_position</span><span class="o">=</span><span class="n">stop_position</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileBasedSource.estimate_size"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsource.html#apache_beam.io.filebasedsource.FileBasedSource.estimate_size">[docs]</a> <span class="nd">@check_accessible</span><span class="p">([</span><span class="s1">'_pattern'</span><span class="p">])</span>
+ <span class="k">def</span> <span class="nf">estimate_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">pattern</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_pattern</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+ <span class="n">match_result</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">match</span><span class="p">([</span><span class="n">pattern</span><span class="p">])[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="k">return</span> <span class="nb">sum</span><span class="p">([</span><span class="n">f</span><span class="o">.</span><span class="n">size_in_bytes</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">match_result</span><span class="o">.</span><span class="n">metadata_list</span><span class="p">])</span></div>
+
+<div class="viewcode-block" id="FileBasedSource.read"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsource.html#apache_beam.io.filebasedsource.FileBasedSource.read">[docs]</a> <span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">range_tracker</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_concat_source</span><span class="p">()</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">range_tracker</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileBasedSource.get_range_tracker"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsource.html#apache_beam.io.filebasedsource.FileBasedSource.get_range_tracker">[docs]</a> <span class="k">def</span> <span class="nf">get_range_tracker</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">start_position</span><span class="p">,</span> <span class="n">stop_position</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_concat_source</span><span class="p">()</span><span class="o">.</span><span class="n">get_range_tracker</span><span class="p">(</span><span class="n">start_position</span><span class="p">,</span>
+ <span class="n">stop_position</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileBasedSource.read_records"><a class="viewcode-back" href="../../../apache_beam.io.filebasedsource.html#apache_beam.io.filebasedsource.FileBasedSource.read_records">[docs]</a> <span class="k">def</span> <span class="nf">read_records</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_name</span><span class="p">,</span> <span class="n">offset_range_tracker</span><span class="p">):</span>
+ <span class="sd">"""Returns a generator of records created by reading file 'file_name'.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> file_name: a ``string`` that gives the name of the file to be read. Method</span>
+<span class="sd"> ``FileBasedSource.open_file()`` must be used to open the file</span>
+<span class="sd"> and create a seekable file object.</span>
+<span class="sd"> offset_range_tracker: a object of type ``OffsetRangeTracker``. This</span>
+<span class="sd"> defines the byte range of the file that should be</span>
+<span class="sd"> read. See documentation in</span>
+<span class="sd"> ``iobase.BoundedSource.read()`` for more information</span>
+<span class="sd"> on reading records while complying to the range</span>
+<span class="sd"> defined by a given ``RangeTracker``.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> an iterator that gives the records read from the given file.</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">splittable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span></div>
+
+
+<span class="k">def</span> <span class="nf">_determine_splittability_from_compression_type</span><span class="p">(</span>
+ <span class="n">file_path</span><span class="p">,</span> <span class="n">compression_type</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">compression_type</span> <span class="o">==</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">:</span>
+ <span class="n">compression_type</span> <span class="o">=</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">detect_compression_type</span><span class="p">(</span><span class="n">file_path</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="n">compression_type</span> <span class="o">==</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">UNCOMPRESSED</span>
+
+
+<span class="k">class</span> <span class="nc">_SingleFileSource</span><span class="p">(</span><span class="n">iobase</span><span class="o">.</span><span class="n">BoundedSource</span><span class="p">):</span>
+ <span class="sd">"""Denotes a source for a specific file type."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_based_source</span><span class="p">,</span> <span class="n">file_name</span><span class="p">,</span> <span class="n">start_offset</span><span class="p">,</span> <span class="n">stop_offset</span><span class="p">,</span>
+ <span class="n">min_bundle_size</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">splittable</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">start_offset</span><span class="p">,</span> <span class="n">integer_types</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
+ <span class="s1">'start_offset must be a number. Received: </span><span class="si">%r</span><span class="s1">'</span> <span class="o">%</span> <span class="n">start_offset</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">stop_offset</span> <span class="o">!=</span> <span class="n">range_trackers</span><span class="o">.</span><span class="n">OffsetRangeTracker</span><span class="o">.</span><span class="n">OFFSET_INFINITY</span><span class="p">:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">stop_offset</span><span class="p">,</span> <span class="n">integer_types</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
+ <span class="s1">'stop_offset must be a number. Received: </span><span class="si">%r</span><span class="s1">'</span> <span class="o">%</span> <span class="n">stop_offset</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">start_offset</span> <span class="o">>=</span> <span class="n">stop_offset</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+ <span class="s1">'start_offset must be smaller than stop_offset. Received </span><span class="si">%d</span><span class="s1"> and </span><span class="si">%d</span><span class="s1"> '</span>
+ <span class="s1">'for start and stop offsets respectively'</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">start_offset</span><span class="p">,</span> <span class="n">stop_offset</span><span class="p">))</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file_name</span> <span class="o">=</span> <span class="n">file_name</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_is_gcs_file</span> <span class="o">=</span> <span class="n">file_name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'gs://'</span><span class="p">)</span> <span class="k">if</span> <span class="n">file_name</span> <span class="k">else</span> <span class="kc">False</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_start_offset</span> <span class="o">=</span> <span class="n">start_offset</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_stop_offset</span> <span class="o">=</span> <span class="n">stop_offset</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_min_bundle_size</span> <span class="o">=</span> <span class="n">min_bundle_size</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file_based_source</span> <span class="o">=</span> <span class="n">file_based_source</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span> <span class="o">=</span> <span class="n">splittable</span>
+
+ <span class="k">def</span> <span class="nf">split</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">desired_bundle_size</span><span class="p">,</span> <span class="n">start_offset</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">stop_offset</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">start_offset</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">start_offset</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_start_offset</span>
+ <span class="k">if</span> <span class="n">stop_offset</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">stop_offset</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_stop_offset</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span><span class="p">:</span>
+ <span class="n">splits</span> <span class="o">=</span> <span class="n">OffsetRange</span><span class="p">(</span><span class="n">start_offset</span><span class="p">,</span> <span class="n">stop_offset</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span>
+ <span class="n">desired_bundle_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_bundle_size</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">split</span> <span class="ow">in</span> <span class="n">splits</span><span class="p">:</span>
+ <span class="k">yield</span> <span class="n">iobase</span><span class="o">.</span><span class="n">SourceBundle</span><span class="p">(</span>
+ <span class="n">split</span><span class="o">.</span><span class="n">stop</span> <span class="o">-</span> <span class="n">split</span><span class="o">.</span><span class="n">start</span><span class="p">,</span>
+ <span class="n">_SingleFileSource</span><span class="p">(</span>
+ <span class="c1"># Copying this so that each sub-source gets a fresh instance.</span>
+ <span class="n">pickler</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">pickler</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_file_based_source</span><span class="p">)),</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file_name</span><span class="p">,</span>
+ <span class="n">split</span><span class="o">.</span><span class="n">start</span><span class="p">,</span>
+ <span class="n">split</span><span class="o">.</span><span class="n">stop</span><span class="p">,</span>
+ <span class="n">min_bundle_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_min_bundle_size</span><span class="p">,</span>
+ <span class="n">splittable</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span><span class="p">),</span>
+ <span class="n">split</span><span class="o">.</span><span class="n">start</span><span class="p">,</span>
+ <span class="n">split</span><span class="o">.</span><span class="n">stop</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c1"># Returning a single sub-source with end offset set to OFFSET_INFINITY (so</span>
+ <span class="c1"># that all data of the source gets read) since this source is</span>
+ <span class="c1"># unsplittable. Choosing size of the file as end offset will be wrong for</span>
+ <span class="c1"># certain unsplittable source, e.g., compressed sources.</span>
+ <span class="k">yield</span> <span class="n">iobase</span><span class="o">.</span><span class="n">SourceBundle</span><span class="p">(</span>
+ <span class="n">stop_offset</span> <span class="o">-</span> <span class="n">start_offset</span><span class="p">,</span>
+ <span class="n">_SingleFileSource</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file_based_source</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file_name</span><span class="p">,</span>
+ <span class="n">start_offset</span><span class="p">,</span>
+ <span class="n">range_trackers</span><span class="o">.</span><span class="n">OffsetRangeTracker</span><span class="o">.</span><span class="n">OFFSET_INFINITY</span><span class="p">,</span>
+ <span class="n">min_bundle_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_min_bundle_size</span><span class="p">,</span>
+ <span class="n">splittable</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span>
+ <span class="p">),</span>
+ <span class="n">start_offset</span><span class="p">,</span>
+ <span class="n">range_trackers</span><span class="o">.</span><span class="n">OffsetRangeTracker</span><span class="o">.</span><span class="n">OFFSET_INFINITY</span>
+ <span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">estimate_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_stop_offset</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">_start_offset</span>
+
+ <span class="k">def</span> <span class="nf">get_range_tracker</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">start_position</span><span class="p">,</span> <span class="n">stop_position</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">start_position</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">start_position</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_start_offset</span>
+ <span class="k">if</span> <span class="n">stop_position</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="c1"># If file is unsplittable we choose OFFSET_INFINITY as the default end</span>
+ <span class="c1"># offset so that all data of the source gets read. Choosing size of the</span>
+ <span class="c1"># file as end offset will be wrong for certain unsplittable source, for</span>
+ <span class="c1"># e.g., compressed sources.</span>
+ <span class="n">stop_position</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_stop_offset</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span>
+ <span class="k">else</span> <span class="n">range_trackers</span><span class="o">.</span><span class="n">OffsetRangeTracker</span><span class="o">.</span><span class="n">OFFSET_INFINITY</span><span class="p">)</span>
+
+ <span class="n">range_tracker</span> <span class="o">=</span> <span class="n">range_trackers</span><span class="o">.</span><span class="n">OffsetRangeTracker</span><span class="p">(</span>
+ <span class="n">start_position</span><span class="p">,</span> <span class="n">stop_position</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span><span class="p">:</span>
+ <span class="n">range_tracker</span> <span class="o">=</span> <span class="n">range_trackers</span><span class="o">.</span><span class="n">UnsplittableRangeTracker</span><span class="p">(</span><span class="n">range_tracker</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="n">range_tracker</span>
+
+ <span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">range_tracker</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file_based_source</span><span class="o">.</span><span class="n">read_records</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_file_name</span><span class="p">,</span> <span class="n">range_tracker</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">default_output_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file_based_source</span><span class="o">.</span><span class="n">default_output_coder</span><span class="p">()</span>
+
+
+<span class="k">class</span> <span class="nc">_ExpandIntoRanges</span><span class="p">(</span><span class="n">DoFn</span><span class="p">):</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">splittable</span><span class="p">,</span> <span class="n">compression_type</span><span class="p">,</span> <span class="n">desired_bundle_size</span><span class="p">,</span> <span class="n">min_bundle_size</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_desired_bundle_size</span> <span class="o">=</span> <span class="n">desired_bundle_size</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_min_bundle_size</span> <span class="o">=</span> <span class="n">min_bundle_size</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span> <span class="o">=</span> <span class="n">splittable</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">=</span> <span class="n">compression_type</span>
+
+ <span class="k">def</span> <span class="nf">process</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="n">match_results</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">match</span><span class="p">([</span><span class="n">element</span><span class="p">])</span>
+ <span class="k">for</span> <span class="n">metadata</span> <span class="ow">in</span> <span class="n">match_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">metadata_list</span><span class="p">:</span>
+ <span class="n">splittable</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span> <span class="ow">and</span>
+ <span class="n">_determine_splittability_from_compression_type</span><span class="p">(</span>
+ <span class="n">metadata</span><span class="o">.</span><span class="n">path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span><span class="p">))</span>
+
+ <span class="k">if</span> <span class="n">splittable</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">split</span> <span class="ow">in</span> <span class="n">OffsetRange</span><span class="p">(</span>
+ <span class="mi">0</span><span class="p">,</span> <span class="n">metadata</span><span class="o">.</span><span class="n">size_in_bytes</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_desired_bundle_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_bundle_size</span><span class="p">):</span>
+ <span class="k">yield</span> <span class="p">(</span><span class="n">metadata</span><span class="p">,</span> <span class="n">split</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">yield</span> <span class="p">(</span><span class="n">metadata</span><span class="p">,</span> <span class="n">OffsetRange</span><span class="p">(</span>
+ <span class="mi">0</span><span class="p">,</span> <span class="n">range_trackers</span><span class="o">.</span><span class="n">OffsetRangeTracker</span><span class="o">.</span><span class="n">OFFSET_INFINITY</span><span class="p">))</span>
+
+
+<span class="k">class</span> <span class="nc">_ReadRange</span><span class="p">(</span><span class="n">DoFn</span><span class="p">):</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source_from_file</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_source_from_file</span> <span class="o">=</span> <span class="n">source_from_file</span>
+
+ <span class="k">def</span> <span class="nf">process</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="n">metadata</span><span class="p">,</span> <span class="nb">range</span> <span class="o">=</span> <span class="n">element</span>
+ <span class="n">source</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_source_from_file</span><span class="p">(</span><span class="n">metadata</span><span class="o">.</span><span class="n">path</span><span class="p">)</span>
+ <span class="c1"># Following split() operation has to be performed to create a proper</span>
+ <span class="c1"># _SingleFileSource. Otherwise what we have is a ConcatSource that contains</span>
+ <span class="c1"># a single _SingleFileSource. ConcatSource.read() expects a RangeTraker for</span>
+ <span class="c1"># sub-source range and reads full sub-sources (not byte ranges).</span>
+ <span class="n">source</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">source</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s1">'inf'</span><span class="p">)))[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">source</span>
+ <span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="n">source</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="nb">range</span><span class="o">.</span><span class="n">new_tracker</span><span class="p">()):</span>
+ <span class="k">yield</span> <span class="n">record</span>
+
+
+<span class="k">class</span> <span class="nc">ReadAllFiles</span><span class="p">(</span><span class="n">PTransform</span><span class="p">):</span>
+ <span class="sd">"""A Read transform that reads a PCollection of files.</span>
+
+<span class="sd"> Pipeline authors should not use this directly. This is to be used by Read</span>
+<span class="sd"> PTransform authors who wishes to implement file-based Read transforms that</span>
+<span class="sd"> read a PCollection of files.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">splittable</span><span class="p">,</span> <span class="n">compression_type</span><span class="p">,</span> <span class="n">desired_bundle_size</span><span class="p">,</span> <span class="n">min_bundle_size</span><span class="p">,</span>
+ <span class="n">source_from_file</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Args:</span>
+<span class="sd"> splittable: If True, files won't be split into sub-ranges. If False, files</span>
+<span class="sd"> may or may not be split into data ranges.</span>
+<span class="sd"> compression_type: A ``CompressionType`` object that specifies the</span>
+<span class="sd"> compression type of the files that will be processed. If</span>
+<span class="sd"> ``CompressionType.AUTO``, system will try to automatically</span>
+<span class="sd"> determine the compression type based on the extension of</span>
+<span class="sd"> files.</span>
+<span class="sd"> desired_bundle_size: the desired size of data ranges that should be</span>
+<span class="sd"> generated when splitting a file into data ranges.</span>
+<span class="sd"> min_bundle_size: minimum size of data ranges that should be generated when</span>
+<span class="sd"> splitting a file into data ranges.</span>
+<span class="sd"> source_from_file: a function that produces a ``BoundedSource`` given a</span>
+<span class="sd"> file name. System will use this function to generate</span>
+<span class="sd"> ``BoundedSource`` objects for file paths. Note that file</span>
+<span class="sd"> paths passed to this will be for individual files, not</span>
+<span class="sd"> for file patterns even if the ``PCollection`` of files</span>
+<span class="sd"> processed by the transform consist of file patterns.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span> <span class="o">=</span> <span class="n">splittable</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">=</span> <span class="n">compression_type</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_desired_bundle_size</span> <span class="o">=</span> <span class="n">desired_bundle_size</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_min_bundle_size</span> <span class="o">=</span> <span class="n">min_bundle_size</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_source_from_file</span> <span class="o">=</span> <span class="n">source_from_file</span>
+
+ <span class="k">def</span> <span class="nf">expand</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalue</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">pvalue</span>
+ <span class="o">|</span> <span class="s1">'ExpandIntoRanges'</span> <span class="o">>></span> <span class="n">ParDo</span><span class="p">(</span><span class="n">_ExpandIntoRanges</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_splittable</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_desired_bundle_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_bundle_size</span><span class="p">))</span>
+ <span class="o">|</span> <span class="s1">'Reshard'</span> <span class="o">>></span> <span class="n">Reshuffle</span><span class="p">()</span>
+ <span class="o">|</span> <span class="s1">'ReadRange'</span> <span class="o">>></span> <span class="n">ParDo</span><span class="p">(</span><span class="n">_ReadRange</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_source_from_file</span><span class="p">)))</span>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filesystem.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filesystem.html
new file mode 100644
index 0000000..5517950
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filesystem.html
@@ -0,0 +1,824 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.io.filesystem — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.io.filesystem</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.io.filesystem</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+<span class="sd">"""File system abstraction for file-based sources and sinks."""</span>
+
+<span class="kn">from</span> <span class="nn">__future__</span> <span class="k">import</span> <span class="n">absolute_import</span>
+
+<span class="kn">import</span> <span class="nn">abc</span>
+<span class="kn">import</span> <span class="nn">bz2</span>
+<span class="kn">import</span> <span class="nn">cStringIO</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">os</span>
+<span class="kn">import</span> <span class="nn">time</span>
+<span class="kn">import</span> <span class="nn">zlib</span>
+
+<span class="kn">from</span> <span class="nn">six</span> <span class="k">import</span> <span class="n">integer_types</span>
+
+<span class="kn">from</span> <span class="nn">apache_beam.utils.plugin</span> <span class="k">import</span> <span class="n">BeamPlugin</span>
+
+<span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="vm">__name__</span><span class="p">)</span>
+
+<span class="n">DEFAULT_READ_BUFFER_SIZE</span> <span class="o">=</span> <span class="mi">16</span> <span class="o">*</span> <span class="mi">1024</span> <span class="o">*</span> <span class="mi">1024</span>
+
+<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'CompressionTypes'</span><span class="p">,</span> <span class="s1">'CompressedFile'</span><span class="p">,</span> <span class="s1">'FileMetadata'</span><span class="p">,</span> <span class="s1">'FileSystem'</span><span class="p">,</span>
+ <span class="s1">'MatchResult'</span><span class="p">]</span>
+
+
+<div class="viewcode-block" id="CompressionTypes"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressionTypes">[docs]</a><span class="k">class</span> <span class="nc">CompressionTypes</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""Enum-like class representing known compression types."""</span>
+
+ <span class="c1"># Detect compression based on filename extension.</span>
+ <span class="c1">#</span>
+ <span class="c1"># The following extensions are currently recognized by auto-detection:</span>
+ <span class="c1"># .bz2 (implies BZIP2 as described below).</span>
+ <span class="c1"># .gz (implies GZIP as described below)</span>
+ <span class="c1"># Any non-recognized extension implies UNCOMPRESSED as described below.</span>
+ <span class="n">AUTO</span> <span class="o">=</span> <span class="s1">'auto'</span>
+
+ <span class="c1"># BZIP2 compression.</span>
+ <span class="n">BZIP2</span> <span class="o">=</span> <span class="s1">'bzip2'</span>
+
+ <span class="c1"># GZIP compression (deflate with GZIP headers).</span>
+ <span class="n">GZIP</span> <span class="o">=</span> <span class="s1">'gzip'</span>
+
+ <span class="c1"># Uncompressed (i.e., may be split).</span>
+ <span class="n">UNCOMPRESSED</span> <span class="o">=</span> <span class="s1">'uncompressed'</span>
+
+<div class="viewcode-block" id="CompressionTypes.is_valid_compression_type"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressionTypes.is_valid_compression_type">[docs]</a> <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">is_valid_compression_type</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">compression_type</span><span class="p">):</span>
+ <span class="sd">"""Returns True for valid compression types, False otherwise."""</span>
+ <span class="n">types</span> <span class="o">=</span> <span class="nb">set</span><span class="p">([</span>
+ <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">,</span>
+ <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">BZIP2</span><span class="p">,</span>
+ <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">GZIP</span><span class="p">,</span>
+ <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">UNCOMPRESSED</span>
+ <span class="p">])</span>
+ <span class="k">return</span> <span class="n">compression_type</span> <span class="ow">in</span> <span class="n">types</span></div>
+
+<div class="viewcode-block" id="CompressionTypes.mime_type"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressionTypes.mime_type">[docs]</a> <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">mime_type</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">compression_type</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">'application/octet-stream'</span><span class="p">):</span>
+ <span class="n">mime_types_by_compression_type</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">BZIP2</span><span class="p">:</span> <span class="s1">'application/x-bz2'</span><span class="p">,</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">GZIP</span><span class="p">:</span> <span class="s1">'application/x-gzip'</span><span class="p">,</span>
+ <span class="p">}</span>
+ <span class="k">return</span> <span class="n">mime_types_by_compression_type</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">compression_type</span><span class="p">,</span> <span class="n">default</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="CompressionTypes.detect_compression_type"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressionTypes.detect_compression_type">[docs]</a> <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">detect_compression_type</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">file_path</span><span class="p">):</span>
+ <span class="sd">"""Returns the compression type of a file (based on its suffix)."""</span>
+ <span class="n">compression_types_by_suffix</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'.bz2'</span><span class="p">:</span> <span class="bp">cls</span><span class="o">.</span><span class="n">BZIP2</span><span class="p">,</span> <span class="s1">'.gz'</span><span class="p">:</span> <span class="bp">cls</span><span class="o">.</span><span class="n">GZIP</span><span class="p">}</span>
+ <span class="n">lowercased_path</span> <span class="o">=</span> <span class="n">file_path</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">suffix</span><span class="p">,</span> <span class="n">compression_type</span> <span class="ow">in</span> <span class="n">compression_types_by_suffix</span><span class="o">.</span><span class="n">iteritems</span><span class="p">():</span>
+ <span class="k">if</span> <span class="n">lowercased_path</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="n">suffix</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">compression_type</span>
+ <span class="k">return</span> <span class="bp">cls</span><span class="o">.</span><span class="n">UNCOMPRESSED</span></div></div>
+
+
+<div class="viewcode-block" id="CompressedFile"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile">[docs]</a><span class="k">class</span> <span class="nc">CompressedFile</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""File wrapper for easier handling of compressed files."""</span>
+ <span class="c1"># XXX: This class is not thread safe in the read path.</span>
+
+ <span class="c1"># The bit mask to use for the wbits parameters of the zlib compressor and</span>
+ <span class="c1"># decompressor objects.</span>
+ <span class="n">_gzip_mask</span> <span class="o">=</span> <span class="n">zlib</span><span class="o">.</span><span class="n">MAX_WBITS</span> <span class="o">|</span> <span class="mi">16</span> <span class="c1"># Mask when using GZIP headers.</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">fileobj</span><span class="p">,</span>
+ <span class="n">compression_type</span><span class="o">=</span><span class="n">CompressionTypes</span><span class="o">.</span><span class="n">GZIP</span><span class="p">,</span>
+ <span class="n">read_size</span><span class="o">=</span><span class="n">DEFAULT_READ_BUFFER_SIZE</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">fileobj</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'File object must not be None'</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">is_valid_compression_type</span><span class="p">(</span><span class="n">compression_type</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'compression_type must be CompressionType object but '</span>
+ <span class="s1">'was </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">compression_type</span><span class="p">))</span>
+ <span class="k">if</span> <span class="n">compression_type</span> <span class="ow">in</span> <span class="p">(</span><span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">,</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">UNCOMPRESSED</span>
+ <span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+ <span class="s1">'Cannot create object with unspecified or no compression'</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file</span> <span class="o">=</span> <span class="n">fileobj</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">=</span> <span class="n">compression_type</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">tell</span><span class="p">()</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'File object must be at position 0 but was </span><span class="si">%d</span><span class="s1">'</span> <span class="o">%</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">tell</span><span class="p">())</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_position</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_size</span> <span class="o">=</span> <span class="kc">None</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">readable</span><span class="p">():</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_size</span> <span class="o">=</span> <span class="n">read_size</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span> <span class="o">=</span> <span class="n">cStringIO</span><span class="o">.</span><span class="n">StringIO</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_position</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_eof</span> <span class="o">=</span> <span class="kc">False</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_decompressor</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span> <span class="o">=</span> <span class="kc">None</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">writeable</span><span class="p">():</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_compressor</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_compressor</span> <span class="o">=</span> <span class="kc">None</span>
+
+ <span class="k">def</span> <span class="nf">_initialize_decompressor</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">==</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">BZIP2</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span> <span class="o">=</span> <span class="n">bz2</span><span class="o">.</span><span class="n">BZ2Decompressor</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">==</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">GZIP</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span> <span class="o">=</span> <span class="n">zlib</span><span class="o">.</span><span class="n">decompressobj</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_gzip_mask</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">_initialize_compressor</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">==</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">BZIP2</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_compressor</span> <span class="o">=</span> <span class="n">bz2</span><span class="o">.</span><span class="n">BZ2Compressor</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">==</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">GZIP</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_compressor</span> <span class="o">=</span> <span class="n">zlib</span><span class="o">.</span><span class="n">compressobj</span><span class="p">(</span><span class="n">zlib</span><span class="o">.</span><span class="n">Z_DEFAULT_COMPRESSION</span><span class="p">,</span>
+ <span class="n">zlib</span><span class="o">.</span><span class="n">DEFLATED</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_gzip_mask</span><span class="p">)</span>
+
+<div class="viewcode-block" id="CompressedFile.readable"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile.readable">[docs]</a> <span class="k">def</span> <span class="nf">readable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">mode</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">mode</span>
+ <span class="k">return</span> <span class="s1">'r'</span> <span class="ow">in</span> <span class="n">mode</span> <span class="ow">or</span> <span class="s1">'a'</span> <span class="ow">in</span> <span class="n">mode</span></div>
+
+<div class="viewcode-block" id="CompressedFile.writeable"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile.writeable">[docs]</a> <span class="k">def</span> <span class="nf">writeable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">mode</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">mode</span>
+ <span class="k">return</span> <span class="s1">'w'</span> <span class="ow">in</span> <span class="n">mode</span> <span class="ow">or</span> <span class="s1">'a'</span> <span class="ow">in</span> <span class="n">mode</span></div>
+
+<div class="viewcode-block" id="CompressedFile.write"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile.write">[docs]</a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span>
+ <span class="sd">"""Write data to file."""</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compressor</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'compressor not initialized'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_position</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
+ <span class="n">compressed</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compressor</span><span class="o">.</span><span class="n">compress</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">compressed</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">compressed</span><span class="p">)</span></div>
+
+ <span class="k">def</span> <span class="nf">_fetch_to_internal_buffer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num_bytes</span><span class="p">):</span>
+ <span class="sd">"""Fetch up to num_bytes into the internal buffer."""</span>
+ <span class="k">if</span> <span class="p">(</span><span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_eof</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_position</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span>
+ <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">tell</span><span class="p">()</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_position</span><span class="p">)</span> <span class="o"><</span> <span class="n">num_bytes</span><span class="p">):</span>
+ <span class="c1"># There aren't enough number of bytes to accommodate a read, so we</span>
+ <span class="c1"># prepare for a possibly large read by clearing up all internal buffers</span>
+ <span class="c1"># but without dropping any previous held data.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_read_position</span><span class="p">)</span>
+ <span class="n">data</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_clear_read_buffer</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
+
+ <span class="k">while</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_eof</span> <span class="ow">and</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">tell</span><span class="p">()</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_position</span>
+ <span class="p">)</span> <span class="o"><</span> <span class="n">num_bytes</span><span class="p">:</span>
+ <span class="c1"># Continue reading from the underlying file object until enough bytes are</span>
+ <span class="c1"># available, or EOF is reached.</span>
+ <span class="n">buf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_read_size</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">buf</span><span class="p">:</span>
+ <span class="n">decompressed</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span><span class="o">.</span><span class="n">decompress</span><span class="p">(</span><span class="n">buf</span><span class="p">)</span>
+ <span class="k">del</span> <span class="n">buf</span> <span class="c1"># Free up some possibly large and no-longer-needed memory.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">decompressed</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c1"># EOF of current stream reached.</span>
+ <span class="c1">#</span>
+ <span class="c1"># Any uncompressed data at the end of the stream of a gzip or bzip2</span>
+ <span class="c1"># file that is not corrupted points to a concatenated compressed</span>
+ <span class="c1"># file. We read concatenated files by recursively creating decompressor</span>
+ <span class="c1"># objects for the unused compressed data.</span>
+ <span class="k">if</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">==</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">BZIP2</span> <span class="ow">or</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">==</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">GZIP</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span><span class="o">.</span><span class="n">unused_data</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">:</span>
+ <span class="n">buf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span><span class="o">.</span><span class="n">unused_data</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">bz2</span><span class="o">.</span><span class="n">BZ2Decompressor</span><span class="p">()</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compression_type</span> <span class="o">==</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">BZIP2</span>
+ <span class="k">else</span> <span class="n">zlib</span><span class="o">.</span><span class="n">decompressobj</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_gzip_mask</span><span class="p">))</span>
+ <span class="n">decompressed</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span><span class="o">.</span><span class="n">decompress</span><span class="p">(</span><span class="n">buf</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">decompressed</span><span class="p">)</span>
+ <span class="k">continue</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c1"># Gzip and bzip2 formats do not require flushing remaining data in the</span>
+ <span class="c1"># decompressor into the read buffer when fully decompressing files.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span><span class="o">.</span><span class="n">flush</span><span class="p">())</span>
+
+ <span class="c1"># Record that we have hit the end of file, so we won't unnecessarily</span>
+ <span class="c1"># repeat the completeness verification step above.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_eof</span> <span class="o">=</span> <span class="kc">True</span>
+
+ <span class="k">def</span> <span class="nf">_read_from_internal_buffer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">read_fn</span><span class="p">):</span>
+ <span class="sd">"""Read from the internal buffer by using the supplied read_fn."""</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_read_position</span><span class="p">)</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="n">read_fn</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_position</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_position</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_END</span><span class="p">)</span> <span class="c1"># Allow future writes.</span>
+ <span class="k">return</span> <span class="n">result</span>
+
+<div class="viewcode-block" id="CompressedFile.read"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile.read">[docs]</a> <span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num_bytes</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'decompressor not initialized'</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_fetch_to_internal_buffer</span><span class="p">(</span><span class="n">num_bytes</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_from_internal_buffer</span><span class="p">(</span>
+ <span class="k">lambda</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">num_bytes</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="CompressedFile.readline"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile.readline">[docs]</a> <span class="k">def</span> <span class="nf">readline</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Equivalent to standard file.readline(). Same return conventions apply."""</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_decompressor</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'decompressor not initialized'</span><span class="p">)</span>
+
+ <span class="n">io</span> <span class="o">=</span> <span class="n">cStringIO</span><span class="o">.</span><span class="n">StringIO</span><span class="p">()</span>
+ <span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
+ <span class="c1"># Ensure that the internal buffer has at least half the read_size. Going</span>
+ <span class="c1"># with half the _read_size (as opposed to a full _read_size) to ensure</span>
+ <span class="c1"># that actual fetches are more evenly spread out, as opposed to having 2</span>
+ <span class="c1"># consecutive reads at the beginning of a read.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_fetch_to_internal_buffer</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_read_size</span> <span class="o">/</span> <span class="mi">2</span><span class="p">)</span>
+ <span class="n">line</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_from_internal_buffer</span><span class="p">(</span>
+ <span class="k">lambda</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">readline</span><span class="p">())</span>
+ <span class="n">io</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">line</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">)</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">line</span><span class="p">:</span>
+ <span class="k">break</span> <span class="c1"># Newline or EOF reached.</span>
+
+ <span class="k">return</span> <span class="n">io</span><span class="o">.</span><span class="n">getvalue</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="CompressedFile.closed"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile.closed">[docs]</a> <span class="k">def</span> <span class="nf">closed</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">closed</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="CompressedFile.close"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile.close">[docs]</a> <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">readable</span><span class="p">():</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">writeable</span><span class="p">():</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_compressor</span><span class="o">.</span><span class="n">flush</span><span class="p">())</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="CompressedFile.flush"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile.flush">[docs]</a> <span class="k">def</span> <span class="nf">flush</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">writeable</span><span class="p">():</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_compressor</span><span class="o">.</span><span class="n">flush</span><span class="p">())</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span></div>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">seekable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s1">'r'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">mode</span>
+
+ <span class="k">def</span> <span class="nf">_clear_read_buffer</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Clears the read buffer by removing all the contents and</span>
+<span class="sd"> resetting _read_position to 0"""</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_position</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_buffer</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">_rewind_file</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Seeks to the beginning of the input file. Input file's EOF marker</span>
+<span class="sd"> is cleared and _uncompressed_position is reset to zero"""</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_file</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_SET</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_read_eof</span> <span class="o">=</span> <span class="kc">False</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_position</span> <span class="o">=</span> <span class="mi">0</span>
+
+ <span class="k">def</span> <span class="nf">_rewind</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Seeks to the beginning of the input file and resets the internal read</span>
+<span class="sd"> buffer. The decompressor object is re-initialized to ensure that no data</span>
+<span class="sd"> left in it's buffer."""</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_clear_read_buffer</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_rewind_file</span><span class="p">()</span>
+
+ <span class="c1"># Re-initialize decompressor to clear any data buffered prior to rewind</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_decompressor</span><span class="p">()</span>
+
+<div class="viewcode-block" id="CompressedFile.seek"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile.seek">[docs]</a> <span class="k">def</span> <span class="nf">seek</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">offset</span><span class="p">,</span> <span class="n">whence</span><span class="o">=</span><span class="n">os</span><span class="o">.</span><span class="n">SEEK_ [...]
+ <span class="sd">"""Set the file's current offset.</span>
+
+<span class="sd"> Seeking behavior:</span>
+
+<span class="sd"> * seeking from the end :data:`os.SEEK_END` the whole file is decompressed</span>
+<span class="sd"> once to determine it's size. Therefore it is preferred to use</span>
+<span class="sd"> :data:`os.SEEK_SET` or :data:`os.SEEK_CUR` to avoid the processing</span>
+<span class="sd"> overhead</span>
+<span class="sd"> * seeking backwards from the current position rewinds the file to ``0``</span>
+<span class="sd"> and decompresses the chunks to the requested offset</span>
+<span class="sd"> * seeking is only supported in files opened for reading</span>
+<span class="sd"> * if the new offset is out of bound, it is adjusted to either ``0`` or</span>
+<span class="sd"> ``EOF``.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> offset (int): seek offset in the uncompressed content represented as</span>
+<span class="sd"> number</span>
+<span class="sd"> whence (int): seek mode. Supported modes are :data:`os.SEEK_SET`</span>
+<span class="sd"> (absolute seek), :data:`os.SEEK_CUR` (seek relative to the current</span>
+<span class="sd"> position), and :data:`os.SEEK_END` (seek relative to the end, offset</span>
+<span class="sd"> should be negative).</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ~exceptions.IOError: When this buffer is closed.</span>
+<span class="sd"> ~exceptions.ValueError: When whence is invalid or the file is not seekable</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="n">whence</span> <span class="o">==</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_SET</span><span class="p">:</span>
+ <span class="n">absolute_offset</span> <span class="o">=</span> <span class="n">offset</span>
+ <span class="k">elif</span> <span class="n">whence</span> <span class="o">==</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_CUR</span><span class="p">:</span>
+ <span class="n">absolute_offset</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_position</span> <span class="o">+</span> <span class="n">offset</span>
+ <span class="k">elif</span> <span class="n">whence</span> <span class="o">==</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_END</span><span class="p">:</span>
+ <span class="c1"># Determine and cache the uncompressed size of the file</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_size</span><span class="p">:</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">"Seeking relative from end of file is requested. "</span>
+ <span class="s2">"Need to decompress the whole file once to determine "</span>
+ <span class="s2">"its size. This might take a while..."</span><span class="p">)</span>
+ <span class="n">uncompress_start_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
+ <span class="k">while</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_read_size</span><span class="p">):</span>
+ <span class="k">pass</span>
+ <span class="n">uncompress_end_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
+ <span class="n">logger</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">"Full file decompression for seek from end took </span><span class="si">%.2f</span><span class="s2"> secs"</span><span class="p">,</span>
+ <span class="p">(</span><span class="n">uncompress_end_time</span> <span class="o">-</span> <span class="n">uncompress_start_time</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_position</span>
+ <span class="n">absolute_offset</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_size</span> <span class="o">+</span> <span class="n">offset</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Whence mode </span><span class="si">%r</span><span class="s2"> is invalid."</span> <span class="o">%</span> <span class="n">whence</span><span class="p">)</span>
+
+ <span class="c1"># Determine how many bytes needs to be read before we reach</span>
+ <span class="c1"># the requested offset. Rewind if we already passed the position.</span>
+ <span class="k">if</span> <span class="n">absolute_offset</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_position</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_rewind</span><span class="p">()</span>
+ <span class="n">bytes_to_skip</span> <span class="o">=</span> <span class="n">absolute_offset</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_position</span>
+
+ <span class="c1"># Read until the desired position is reached or EOF occurs.</span>
+ <span class="k">while</span> <span class="n">bytes_to_skip</span><span class="p">:</span>
+ <span class="n">data</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="nb">min</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_read_size</span><span class="p">,</span> <span class="n">bytes_to_skip</span><span class="p">))</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">data</span><span class="p">:</span>
+ <span class="k">break</span>
+ <span class="n">bytes_to_skip</span> <span class="o">-=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="CompressedFile.tell"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.CompressedFile.tell">[docs]</a> <span class="k">def</span> <span class="nf">tell</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Returns current position in uncompressed file."""</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_uncompressed_position</span></div>
+
+ <span class="k">def</span> <span class="nf">__enter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+ <span class="k">def</span> <span class="nf">__exit__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">exception_type</span><span class="p">,</span> <span class="n">exception_value</span><span class="p">,</span> <span class="n">traceback</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
+
+
+<div class="viewcode-block" id="FileMetadata"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileMetadata">[docs]</a><span class="k">class</span> <span class="nc">FileMetadata</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""Metadata about a file path that is the output of FileSystem.match</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">size_in_bytes</span><span class="p">):</span>
+ <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">basestring</span><span class="p">)</span> <span class="ow">and</span> <span class="n">path</span><span class="p">,</span> <span class="s2">"Path should be a string"</span>
+ <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">size_in_bytes</span><span class="p">,</span> <span class="n">integer_types</span><span class="p">)</span> <span class="ow">and</span> <span class="n">size_in_bytes</span> <span class="o">>=</span> <span class="mi">0</span><span class="p">,</span> \
+ <span class="s2">"Invalid value for size_in_bytes should </span><span class="si">%s</span><span class="s2"> (of type </span><span class="si">%s</span><span class="s2">)"</span> <span class="o">%</span> <span class="p">(</span>
+ <span class="n">size_in_bytes</span><span class="p">,</span> <span class="nb">type</span><span class="p">(</span><span class="n">size_in_bytes</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">path</span> <span class="o">=</span> <span class="n">path</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">size_in_bytes</span> <span class="o">=</span> <span class="n">size_in_bytes</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="sd">"""Note: This is only used in tests where we verify that mock objects match.</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">FileMetadata</span><span class="p">)</span> <span class="ow">and</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">path</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">path</span> <span class="ow">and</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">size_in_bytes</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">size_in_bytes</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">((</span><span class="bp">self</span><span class="o">.</span><span class="n">path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">size_in_bytes</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">__ne__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="fm">__eq__</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s1">'FileMetadata(</span><span class="si">%s</span><span class="s1">, </span><span class="si">%s</span><span class="s1">)'</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">size_in_bytes</span><span class="p">)</span></div>
+
+
+<div class="viewcode-block" id="MatchResult"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.MatchResult">[docs]</a><span class="k">class</span> <span class="nc">MatchResult</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""Result from the ``FileSystem`` match operation which contains the list</span>
+<span class="sd"> of matched FileMetadata.</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pattern</span><span class="p">,</span> <span class="n">metadata_list</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">metadata_list</span> <span class="o">=</span> <span class="n">metadata_list</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pattern</span> <span class="o">=</span> <span class="n">pattern</span></div>
+
+
+<span class="k">class</span> <span class="nc">BeamIOError</span><span class="p">(</span><span class="ne">IOError</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">,</span> <span class="n">exception_details</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="sd">"""Class representing the errors thrown in the batch file operations.</span>
+<span class="sd"> Args:</span>
+<span class="sd"> msg: Message string for the exception thrown</span>
+<span class="sd"> exception_details: Optional map of individual input to exception for</span>
+<span class="sd"> failed operations in batch. This parameter is optional so if specified</span>
+<span class="sd"> the user can assume that the all errors in the filesystem operation</span>
+<span class="sd"> have been reported. When the details are missing then the operation</span>
+<span class="sd"> may have failed anywhere so the user should use match to determine</span>
+<span class="sd"> the current state of the system.</span>
+<span class="sd"> """</span>
+ <span class="n">message</span> <span class="o">=</span> <span class="s2">"</span><span class="si">%s</span><span class="s2"> with exceptions </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">msg</span><span class="p">,</span> <span class="n">exception_details</span><span class="p">)</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">BeamIOError</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">message</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">exception_details</span> <span class="o">=</span> <span class="n">exception_details</span>
+
+
+<div class="viewcode-block" id="FileSystem"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem">[docs]</a><span class="k">class</span> <span class="nc">FileSystem</span><span class="p">(</span><span class="n">BeamPlugin</span><span class="p">):</span>
+ <span class="sd">"""A class that defines the functions that can be performed on a filesystem.</span>
+
+<span class="sd"> All methods are abstract and they are for file system providers to</span>
+<span class="sd"> implement. Clients should use the FileSystems class to interact with</span>
+<span class="sd"> the correct file system based on the provided file pattern scheme.</span>
+<span class="sd"> """</span>
+ <span class="n">__metaclass__</span> <span class="o">=</span> <span class="n">abc</span><span class="o">.</span><span class="n">ABCMeta</span>
+ <span class="n">CHUNK_SIZE</span> <span class="o">=</span> <span class="mi">1</span> <span class="c1"># Chuck size in the batch operations</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pipeline_options</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Args:</span>
+<span class="sd"> pipeline_options: Instance of ``PipelineOptions``.</span>
+<span class="sd"> """</span>
+
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">_get_compression_type</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">compression_type</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">compression_type</span> <span class="o">==</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">:</span>
+ <span class="n">compression_type</span> <span class="o">=</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">detect_compression_type</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="ow">not</span> <span class="n">CompressionTypes</span><span class="o">.</span><span class="n">is_valid_compression_type</span><span class="p">(</span><span class="n">compression_type</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'compression_type must be CompressionType object but '</span>
+ <span class="s1">'was </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">compression_type</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">compression_type</span>
+
+<div class="viewcode-block" id="FileSystem.scheme"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.scheme">[docs]</a> <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">scheme</span><span class="p">(</span><span class="bp">cls</span><span class="p">):</span>
+ <span class="sd">"""URI scheme for the FileSystem</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileSystem.join"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.join">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">join</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">basepath</span><span class="p">,</span> <span class="o">*</span><span class="n">paths</span><span class="p">):</span>
+ <span class="sd">"""Join two or more pathname components for the filesystem</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> basepath: string path of the first component of the path</span>
+<span class="sd"> paths: path components to be added</span>
+
+<span class="sd"> Returns: full path after combining all the passed components</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileSystem.split"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.split">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">split</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""Splits the given path into two parts.</span>
+
+<span class="sd"> Splits the path into a pair (head, tail) such that tail contains the last</span>
+<span class="sd"> component of the path and head contains everything up to that.</span>
+
+<span class="sd"> For file-systems other than the local file-system, head should include the</span>
+<span class="sd"> prefix.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: path as a string</span>
+<span class="sd"> Returns:</span>
+<span class="sd"> a pair of path components as strings.</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileSystem.mkdirs"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.mkdirs">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">mkdirs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""Recursively create directories for the provided path.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: string path of the directory structure that should be created</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> IOError if leaf directory already exists.</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileSystem.match"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.match">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">match</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">patterns</span><span class="p">,</span> <span class="n">limits</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="sd">"""Find all matching paths to the patterns provided.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> patterns: list of string for the file path pattern to match against</span>
+<span class="sd"> limits: list of maximum number of responses that need to be fetched</span>
+
+<span class="sd"> Returns: list of ``MatchResult`` objects.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``BeamIOError`` if any of the pattern match operations fail</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileSystem.create"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.create">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">create</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">mime_type</span><span class="o">=</span><span class="s1">'application/octet-stream'</span><span class="p">,</span>
+ <span class="n">compression_type</span><span class="o">=</span><span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">):</span>
+ <span class="sd">"""Returns a write channel for the given file path.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: string path of the file object to be written to the system</span>
+<span class="sd"> mime_type: MIME type to specify the type of content in the file object</span>
+<span class="sd"> compression_type: Type of compression to be used for this object</span>
+
+<span class="sd"> Returns: file handle with a close function for the user to use</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileSystem.open"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.open">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">mime_type</span><span class="o">=</span><span class="s1">'application/octet-stream'</span><span class="p">,</span>
+ <span class="n">compression_type</span><span class="o">=</span><span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">):</span>
+ <span class="sd">"""Returns a read channel for the given file path.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: string path of the file object to be read</span>
+<span class="sd"> mime_type: MIME type to specify the type of content in the file object</span>
+<span class="sd"> compression_type: Type of compression to be used for this object</span>
+
+<span class="sd"> Returns: file handle with a close function for the user to use</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileSystem.copy"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.copy">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source_file_names</span><span class="p">,</span> <span class="n">destination_file_names</span><span class="p">):</span>
+ <span class="sd">"""Recursively copy the file tree from the source to the destination</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> source_file_names: list of source file objects that needs to be copied</span>
+<span class="sd"> destination_file_names: list of destination of the new object</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``BeamIOError`` if any of the copy operations fail</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileSystem.rename"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.rename">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">rename</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source_file_names</span><span class="p">,</span> <span class="n">destination_file_names</span><span class="p">):</span>
+ <span class="sd">"""Rename the files at the source list to the destination list.</span>
+<span class="sd"> Source and destination lists should be of the same size.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> source_file_names: List of file paths that need to be moved</span>
+<span class="sd"> destination_file_names: List of destination_file_names for the files</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``BeamIOError`` if any of the rename operations fail</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileSystem.exists"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.exists">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""Check if the provided path exists on the FileSystem.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: string path that needs to be checked.</span>
+
+<span class="sd"> Returns: boolean flag indicating if path exists</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+<div class="viewcode-block" id="FileSystem.delete"><a class="viewcode-back" href="../../../apache_beam.io.filesystem.html#apache_beam.io.filesystem.FileSystem.delete">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">paths</span><span class="p">):</span>
+ <span class="sd">"""Deletes files or directories at the provided paths.</span>
+<span class="sd"> Directories will be deleted recursively.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> paths: list of paths that give the file objects to be deleted</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``BeamIOError`` if any of the delete operations fail</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filesystemio.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filesystemio.html
new file mode 100644
index 0000000..8439414
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filesystemio.html
@@ -0,0 +1,501 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.io.filesystemio — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.io.filesystemio</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.io.filesystemio</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+<span class="sd">"""Utilities for ``FileSystem`` implementations."""</span>
+
+<span class="kn">import</span> <span class="nn">abc</span>
+<span class="kn">import</span> <span class="nn">io</span>
+<span class="kn">import</span> <span class="nn">os</span>
+
+<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'Downloader'</span><span class="p">,</span> <span class="s1">'Uploader'</span><span class="p">,</span> <span class="s1">'DownloaderStream'</span><span class="p">,</span> <span class="s1">'UploaderStream'</span><span class="p">,</span>
+ <span class="s1">'PipeStream'</span><span class="p">]</span>
+
+
+<div class="viewcode-block" id="Downloader"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.Downloader">[docs]</a><span class="k">class</span> <span class="nc">Downloader</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""Download interface for a single file.</span>
+
+<span class="sd"> Implementations should support random access reads.</span>
+<span class="sd"> """</span>
+
+ <span class="n">__metaclass__</span> <span class="o">=</span> <span class="n">abc</span><span class="o">.</span><span class="n">ABCMeta</span>
+
+ <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractproperty</span>
+ <span class="k">def</span> <span class="nf">size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Size of file to download."""</span>
+
+<div class="viewcode-block" id="Downloader.get_range"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.Downloader.get_range">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">get_range</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">start</span><span class="p">,</span> <span class="n">end</span><span class="p">):</span>
+ <span class="sd">"""Retrieve a given byte range [start, end) from this download.</span>
+
+<span class="sd"> Range must be in this form:</span>
+<span class="sd"> 0 <= start < end: Fetch the bytes from start to end.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> start: (int) Initial byte offset.</span>
+<span class="sd"> end: (int) Final byte offset, exclusive.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> (string) A buffer containing the requested data.</span>
+<span class="sd"> """</span></div></div>
+
+
+<div class="viewcode-block" id="Uploader"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.Uploader">[docs]</a><span class="k">class</span> <span class="nc">Uploader</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""Upload interface for a single file."""</span>
+
+ <span class="n">__metaclass__</span> <span class="o">=</span> <span class="n">abc</span><span class="o">.</span><span class="n">ABCMeta</span>
+
+<div class="viewcode-block" id="Uploader.put"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.Uploader.put">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">put</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span>
+ <span class="sd">"""Write data to file sequentially.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> data: (memoryview) Data to write.</span>
+<span class="sd"> """</span></div>
+
+<div class="viewcode-block" id="Uploader.finish"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.Uploader.finish">[docs]</a> <span class="nd">@abc</span><span class="o">.</span><span class="n">abstractmethod</span>
+ <span class="k">def</span> <span class="nf">finish</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Signal to upload any remaining data and close the file.</span>
+
+<span class="sd"> File should be fully written upon return from this method.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> Any error encountered during the upload.</span>
+<span class="sd"> """</span></div></div>
+
+
+<div class="viewcode-block" id="DownloaderStream"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.DownloaderStream">[docs]</a><span class="k">class</span> <span class="nc">DownloaderStream</span><span class="p">(</span><span class="n">io</span><span class="o">.</span><span class="n">RawIOBase</span><span class="p">):</span>
+ <span class="sd">"""Provides a stream interface for Downloader objects."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">downloader</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">'r'</span><span class="p">):</span>
+ <span class="sd">"""Initializes the stream.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> downloader: (Downloader) Filesystem dependent implementation.</span>
+<span class="sd"> mode: (string) Python mode attribute for this stream.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_downloader</span> <span class="o">=</span> <span class="n">downloader</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">mode</span> <span class="o">=</span> <span class="n">mode</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">=</span> <span class="mi">0</span>
+
+<div class="viewcode-block" id="DownloaderStream.readinto"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.DownloaderStream.readinto">[docs]</a> <span class="k">def</span> <span class="nf">readinto</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">b</span><span class="p">):</span>
+ <span class="sd">"""Read up to len(b) bytes into b.</span>
+
+<span class="sd"> Returns number of bytes read (0 for EOF).</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> b: (bytearray/memoryview) Buffer to read into.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_checkClosed</span><span class="p">()</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_downloader</span><span class="o">.</span><span class="n">size</span><span class="p">:</span>
+ <span class="k">return</span> <span class="mi">0</span>
+
+ <span class="n">start</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_position</span>
+ <span class="n">end</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">b</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_downloader</span><span class="o">.</span><span class="n">size</span><span class="p">)</span>
+ <span class="n">data</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_downloader</span><span class="o">.</span><span class="n">get_range</span><span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">end</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
+ <span class="n">b</span><span class="p">[:</span><span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)]</span> <span class="o">=</span> <span class="n">data</span>
+ <span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="DownloaderStream.seek"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.DownloaderStream.seek">[docs]</a> <span class="k">def</span> <span class="nf">seek</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">offset</span><span class="p">,</span> <span class="n">whence</span><span class="o">=</span><span class="n">os</span><span class="o">.</span><span class=" [...]
+ <span class="sd">"""Set the stream's current offset.</span>
+
+<span class="sd"> Note if the new offset is out of bound, it is adjusted to either 0 or EOF.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> offset: seek offset as number.</span>
+<span class="sd"> whence: seek mode. Supported modes are os.SEEK_SET (absolute seek),</span>
+<span class="sd"> os.SEEK_CUR (seek relative to the current position), and os.SEEK_END</span>
+<span class="sd"> (seek relative to the end, offset should be negative).</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``ValueError``: When this stream is closed or if whence is invalid.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_checkClosed</span><span class="p">()</span>
+
+ <span class="k">if</span> <span class="n">whence</span> <span class="o">==</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_SET</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">=</span> <span class="n">offset</span>
+ <span class="k">elif</span> <span class="n">whence</span> <span class="o">==</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_CUR</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">+=</span> <span class="n">offset</span>
+ <span class="k">elif</span> <span class="n">whence</span> <span class="o">==</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_END</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_downloader</span><span class="o">.</span><span class="n">size</span> <span class="o">+</span> <span class="n">offset</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Whence mode </span><span class="si">%r</span><span class="s1"> is invalid.'</span> <span class="o">%</span> <span class="n">whence</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_position</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_downloader</span><span class="o">.</span><span class="n">size</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_position</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_position</span></div>
+
+<div class="viewcode-block" id="DownloaderStream.tell"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.DownloaderStream.tell">[docs]</a> <span class="k">def</span> <span class="nf">tell</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Tell the stream's current offset.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> current offset in reading this stream.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``ValueError``: When this stream is closed.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_checkClosed</span><span class="p">()</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_position</span></div>
+
+<div class="viewcode-block" id="DownloaderStream.seekable"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.DownloaderStream.seekable">[docs]</a> <span class="k">def</span> <span class="nf">seekable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span></div>
+
+<div class="viewcode-block" id="DownloaderStream.readable"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.DownloaderStream.readable">[docs]</a> <span class="k">def</span> <span class="nf">readable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span></div></div>
+
+
+<div class="viewcode-block" id="UploaderStream"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.UploaderStream">[docs]</a><span class="k">class</span> <span class="nc">UploaderStream</span><span class="p">(</span><span class="n">io</span><span class="o">.</span><span class="n">RawIOBase</span><span class="p">):</span>
+ <span class="sd">"""Provides a stream interface for Uploader objects."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">uploader</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">'w'</span><span class="p">):</span>
+ <span class="sd">"""Initializes the stream.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> uploader: (Uploader) Filesystem dependent implementation.</span>
+<span class="sd"> mode: (string) Python mode attribute for this stream.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_uploader</span> <span class="o">=</span> <span class="n">uploader</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">mode</span> <span class="o">=</span> <span class="n">mode</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">=</span> <span class="mi">0</span>
+
+<div class="viewcode-block" id="UploaderStream.tell"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.UploaderStream.tell">[docs]</a> <span class="k">def</span> <span class="nf">tell</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_position</span></div>
+
+<div class="viewcode-block" id="UploaderStream.write"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.UploaderStream.write">[docs]</a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">b</span><span class="p">):</span>
+ <span class="sd">"""Write bytes from b.</span>
+
+<span class="sd"> Returns number of bytes written (<= len(b)).</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> b: (memoryview) Buffer with data to write.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_checkClosed</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_uploader</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">b</span><span class="p">)</span>
+
+ <span class="n">bytes_written</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">b</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_position</span> <span class="o">+=</span> <span class="n">bytes_written</span>
+ <span class="k">return</span> <span class="n">bytes_written</span></div>
+
+<div class="viewcode-block" id="UploaderStream.close"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.UploaderStream.close">[docs]</a> <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Complete the upload and close this stream.</span>
+
+<span class="sd"> This method has no effect if the stream is already closed.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> Any error encountered by the uploader.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">closed</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_uploader</span><span class="o">.</span><span class="n">finish</span><span class="p">()</span>
+
+ <span class="nb">super</span><span class="p">(</span><span class="n">UploaderStream</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="UploaderStream.writable"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.UploaderStream.writable">[docs]</a> <span class="k">def</span> <span class="nf">writable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="kc">True</span></div></div>
+
+
+<div class="viewcode-block" id="PipeStream"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.PipeStream">[docs]</a><span class="k">class</span> <span class="nc">PipeStream</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""A class that presents a pipe connection as a readable stream."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">recv_pipe</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">conn</span> <span class="o">=</span> <span class="n">recv_pipe</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">closed</span> <span class="o">=</span> <span class="kc">False</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">position</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">remaining</span> <span class="o">=</span> <span class="s1">''</span>
+
+<div class="viewcode-block" id="PipeStream.read"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.PipeStream.read">[docs]</a> <span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">):</span>
+ <span class="sd">"""Read data from the wrapped pipe connection.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> size: Number of bytes to read. Actual number of bytes read is always</span>
+<span class="sd"> equal to size unless EOF is reached.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> data read as str.</span>
+<span class="sd"> """</span>
+ <span class="n">data_list</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="n">bytes_read</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">while</span> <span class="n">bytes_read</span> <span class="o"><</span> <span class="n">size</span><span class="p">:</span>
+ <span class="n">bytes_from_remaining</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">size</span> <span class="o">-</span> <span class="n">bytes_read</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">remaining</span><span class="p">))</span>
+ <span class="n">data_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">remaining</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="n">bytes_from_remaining</span><span class="p">])</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">remaining</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">remaining</span><span class="p">[</span><span class="n">bytes_from_remaining</span><span class="p">:]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">position</span> <span class="o">+=</span> <span class="n">bytes_from_remaining</span>
+ <span class="n">bytes_read</span> <span class="o">+=</span> <span class="n">bytes_from_remaining</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">remaining</span><span class="p">:</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">remaining</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn</span><span class="o">.</span><span class="n">recv_bytes</span><span class="p">()</span>
+ <span class="k">except</span> <span class="ne">EOFError</span><span class="p">:</span>
+ <span class="k">break</span>
+ <span class="k">return</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data_list</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="PipeStream.tell"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.PipeStream.tell">[docs]</a> <span class="k">def</span> <span class="nf">tell</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Tell the file's current offset.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> current offset in reading this file.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``ValueError``: When this stream is closed.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_check_open</span><span class="p">()</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">position</span></div>
+
+<div class="viewcode-block" id="PipeStream.seek"><a class="viewcode-back" href="../../../apache_beam.io.filesystemio.html#apache_beam.io.filesystemio.PipeStream.seek">[docs]</a> <span class="k">def</span> <span class="nf">seek</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">offset</span><span class="p">,</span> <span class="n">whence</span><span class="o">=</span><span class="n">os</span><span class="o">.</span><span class="n">SEEK_SET< [...]
+ <span class="c1"># The apitools library used by the gcsio.Uploader class insists on seeking</span>
+ <span class="c1"># to the end of a stream to do a check before completing an upload, so we</span>
+ <span class="c1"># must have this no-op method here in that case.</span>
+ <span class="k">if</span> <span class="n">whence</span> <span class="o">==</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_END</span> <span class="ow">and</span> <span class="n">offset</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">return</span>
+ <span class="k">elif</span> <span class="n">whence</span> <span class="o">==</span> <span class="n">os</span><span class="o">.</span><span class="n">SEEK_SET</span> <span class="ow">and</span> <span class="n">offset</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">position</span><span class="p">:</span>
+ <span class="k">return</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
+
+ <span class="k">def</span> <span class="nf">_check_open</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">closed</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">IOError</span><span class="p">(</span><span class="s1">'Stream is closed.'</span><span class="p">)</span></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filesystems.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filesystems.html
new file mode 100644
index 0000000..f80c825
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/filesystems.html
@@ -0,0 +1,493 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.io.filesystems — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.io.filesystems</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.io.filesystems</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""FileSystems interface class for accessing the correct filesystem"""</span>
+
+<span class="kn">import</span> <span class="nn">re</span>
+
+<span class="kn">from</span> <span class="nn">apache_beam.io.filesystem</span> <span class="k">import</span> <span class="n">BeamIOError</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.filesystem</span> <span class="k">import</span> <span class="n">CompressionTypes</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.filesystem</span> <span class="k">import</span> <span class="n">FileSystem</span>
+
+<span class="c1"># All filesystem implements should be added here as</span>
+<span class="c1"># best effort imports. We don't want to force loading</span>
+<span class="c1"># a module if the user doesn't supply the correct</span>
+<span class="c1"># packages that these filesystems rely on.</span>
+<span class="c1">#</span>
+<span class="c1"># pylint: disable=wrong-import-position, unused-import</span>
+<span class="k">try</span><span class="p">:</span>
+ <span class="kn">from</span> <span class="nn">apache_beam.io.hadoopfilesystem</span> <span class="k">import</span> <span class="n">HadoopFileSystem</span>
+<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="k">pass</span>
+
+<span class="k">try</span><span class="p">:</span>
+ <span class="kn">from</span> <span class="nn">apache_beam.io.localfilesystem</span> <span class="k">import</span> <span class="n">LocalFileSystem</span>
+<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="k">pass</span>
+
+<span class="k">try</span><span class="p">:</span>
+ <span class="kn">from</span> <span class="nn">apache_beam.io.gcp.gcsfilesystem</span> <span class="k">import</span> <span class="n">GCSFileSystem</span>
+<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="k">pass</span>
+<span class="c1"># pylint: enable=wrong-import-position, unused-import</span>
+
+<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'FileSystems'</span><span class="p">]</span>
+
+
+<div class="viewcode-block" id="FileSystems"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems">[docs]</a><span class="k">class</span> <span class="nc">FileSystems</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""A class that defines the functions that can be performed on a filesystem.</span>
+<span class="sd"> All methods are static and access the underlying registered filesystems.</span>
+<span class="sd"> """</span>
+ <span class="n">URI_SCHEMA_PATTERN</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'(?P<scheme>[a-zA-Z][-a-zA-Z0-9+.]*)://.*'</span><span class="p">)</span>
+
+ <span class="n">_pipeline_options</span> <span class="o">=</span> <span class="kc">None</span>
+
+<div class="viewcode-block" id="FileSystems.set_options"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.set_options">[docs]</a> <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">set_options</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">pipeline_options</span><span class="p">):</span>
+ <span class="sd">"""Set filesystem options.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> pipeline_options: Instance of ``PipelineOptions``.</span>
+<span class="sd"> """</span>
+ <span class="bp">cls</span><span class="o">.</span><span class="n">_pipeline_options</span> <span class="o">=</span> <span class="n">pipeline_options</span></div>
+
+<div class="viewcode-block" id="FileSystems.get_scheme"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.get_scheme">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">get_scheme</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
+ <span class="n">match_result</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">URI_SCHEMA_PATTERN</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">path</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
+ <span class="k">if</span> <span class="n">match_result</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="kc">None</span>
+ <span class="k">return</span> <span class="n">match_result</span><span class="o">.</span><span class="n">groupdict</span><span class="p">()[</span><span class="s1">'scheme'</span><span class="p">]</span></div>
+
+<div class="viewcode-block" id="FileSystems.get_filesystem"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.get_filesystem">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">get_filesystem</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""Get the correct filesystem for the specified path</span>
+<span class="sd"> """</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">path_scheme</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_scheme</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+ <span class="n">systems</span> <span class="o">=</span> <span class="p">[</span><span class="n">fs</span> <span class="k">for</span> <span class="n">fs</span> <span class="ow">in</span> <span class="n">FileSystem</span><span class="o">.</span><span class="n">get_all_subclasses</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">fs</span><span class="o">.</span><span class="n">scheme</span><span class="p">()</span> <span class="o">==</span> <span class="n">path_scheme</span><span class="p">]</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">systems</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Unable to get the Filesystem for path </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">path</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">systems</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">systems</span><span class="p">[</span><span class="mi">0</span><span class="p">](</span><span class="n">pipeline_options</span><span class="o">=</span><span class="n">FileSystems</span><span class="o">.</span><span class="n">_pipeline_options</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Found more than one filesystem for path </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">path</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
+ <span class="k">raise</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">BeamIOError</span><span class="p">(</span><span class="s1">'Unable to get the Filesystem'</span><span class="p">,</span> <span class="p">{</span><span class="n">path</span><span class="p">:</span> <span class="n">e</span><span class="p">})</span></div>
+
+<div class="viewcode-block" id="FileSystems.join"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.join">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">join</span><span class="p">(</span><span class="n">basepath</span><span class="p">,</span> <span class="o">*</span><span class="n">paths</span><span class="p">):</span>
+ <span class="sd">"""Join two or more pathname components for the filesystem</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> basepath: string path of the first component of the path</span>
+<span class="sd"> paths: path components to be added</span>
+
+<span class="sd"> Returns: full path after combining all the passed components</span>
+<span class="sd"> """</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">basepath</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">basepath</span><span class="p">,</span> <span class="o">*</span><span class="n">paths</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileSystems.split"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.split">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">split</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""Splits the given path into two parts.</span>
+
+<span class="sd"> Splits the path into a pair (head, tail) such that tail contains the last</span>
+<span class="sd"> component of the path and head contains everything up to that.</span>
+
+<span class="sd"> For file-systems other than the local file-system, head should include the</span>
+<span class="sd"> prefix.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: path as a string</span>
+<span class="sd"> Returns:</span>
+<span class="sd"> a pair of path components as strings.</span>
+<span class="sd"> """</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileSystems.mkdirs"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.mkdirs">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">mkdirs</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""Recursively create directories for the provided path.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: string path of the directory structure that should be created</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> IOError if leaf directory already exists.</span>
+<span class="sd"> """</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">mkdirs</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileSystems.match"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.match">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">match</span><span class="p">(</span><span class="n">patterns</span><span class="p">,</span> <span class="n">limits</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="sd">"""Find all matching paths to the patterns provided.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> patterns: list of string for the file path pattern to match against</span>
+<span class="sd"> limits: list of maximum number of responses that need to be fetched</span>
+
+<span class="sd"> Returns: list of ``MatchResult`` objects.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``BeamIOError`` if any of the pattern match operations fail</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">patterns</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">return</span> <span class="p">[]</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">patterns</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">patterns</span><span class="p">,</span> <span class="n">limits</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileSystems.create"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.create">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">create</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">mime_type</span><span class="o">=</span><span class="s1">'application/octet-stream'</span><span class="p">,</span>
+ <span class="n">compression_type</span><span class="o">=</span><span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">):</span>
+ <span class="sd">"""Returns a write channel for the given file path.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: string path of the file object to be written to the system</span>
+<span class="sd"> mime_type: MIME type to specify the type of content in the file object</span>
+<span class="sd"> compression_type: Type of compression to be used for this object. See</span>
+<span class="sd"> ``CompressionTypes`` for possible values.</span>
+
+<span class="sd"> Returns: file handle with a ``close`` function for the user to use.</span>
+<span class="sd"> """</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">mime_type</span><span class="p">,</span> <span class="n">compression_type</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileSystems.open"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.open">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">mime_type</span><span class="o">=</span><span class="s1">'application/octet-stream'</span><span class="p">,</span>
+ <span class="n">compression_type</span><span class="o">=</span><span class="n">CompressionTypes</span><span class="o">.</span><span class="n">AUTO</span><span class="p">):</span>
+ <span class="sd">"""Returns a read channel for the given file path.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: string path of the file object to be written to the system</span>
+<span class="sd"> mime_type: MIME type to specify the type of content in the file object</span>
+<span class="sd"> compression_type: Type of compression to be used for this object. See</span>
+<span class="sd"> ``CompressionTypes`` for possible values.</span>
+
+<span class="sd"> Returns: file handle with a ``close`` function for the user to use.</span>
+<span class="sd"> """</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">mime_type</span><span class="p">,</span> <span class="n">compression_type</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileSystems.copy"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.copy">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="n">source_file_names</span><span class="p">,</span> <span class="n">destination_file_names</span><span class="p">):</span>
+ <span class="sd">"""Recursively copy the file list from the source to the destination</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> source_file_names: list of source file objects that needs to be copied</span>
+<span class="sd"> destination_file_names: list of destination of the new object</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``BeamIOError`` if any of the copy operations fail</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">source_file_names</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">return</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">source_file_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">source_file_names</span><span class="p">,</span> <span class="n">destination_file_names</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileSystems.rename"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.rename">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">rename</span><span class="p">(</span><span class="n">source_file_names</span><span class="p">,</span> <span class="n">destination_file_names</span><span class="p">):</span>
+ <span class="sd">"""Rename the files at the source list to the destination list.</span>
+<span class="sd"> Source and destination lists should be of the same size.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> source_file_names: List of file paths that need to be moved</span>
+<span class="sd"> destination_file_names: List of destination_file_names for the files</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``BeamIOError`` if any of the rename operations fail</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">source_file_names</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">return</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">source_file_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">source_file_names</span><span class="p">,</span> <span class="n">destination_file_names</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileSystems.exists"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.exists">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">exists</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""Check if the provided path exists on the FileSystem.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: string path that needs to be checked.</span>
+
+<span class="sd"> Returns: boolean flag indicating if path exists</span>
+<span class="sd"> """</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileSystems.delete"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.delete">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">delete</span><span class="p">(</span><span class="n">paths</span><span class="p">):</span>
+ <span class="sd">"""Deletes files or directories at the provided paths.</span>
+<span class="sd"> Directories will be deleted recursively.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> paths: list of paths that give the file objects to be deleted</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ``BeamIOError`` if any of the delete operations fail</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">paths</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">return</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">paths</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">delete</span><span class="p">(</span><span class="n">paths</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FileSystems.get_chunk_size"><a class="viewcode-back" href="../../../apache_beam.io.filesystems.html#apache_beam.io.filesystems.FileSystems.get_chunk_size">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">get_chunk_size</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""Get the correct chunk size for the FileSystem.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> path: string path that needs to be checked.</span>
+
+<span class="sd"> Returns: integer size for parallelization in the FS operations.</span>
+<span class="sd"> """</span>
+ <span class="n">filesystem</span> <span class="o">=</span> <span class="n">FileSystems</span><span class="o">.</span><span class="n">get_filesystem</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">filesystem</span><span class="o">.</span><span class="n">CHUNK_SIZE</span></div></div>
+</pre></div>
+
+ </div>
+ <div class="articleComments">
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright .
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true,
+ SOURCELINK_SUFFIX: '.txt'
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/gcp/bigquery.html b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/gcp/bigquery.html
new file mode 100644
index 0000000..4a078c1
--- /dev/null
+++ b/src/documentation/sdks/pydoc/2.4.0/_modules/apache_beam/io/gcp/bigquery.html
@@ -0,0 +1,1682 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>apache_beam.io.gcp.bigquery — Apache Beam documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="index" title="Index"
+ href="../../../../genindex.html"/>
+ <link rel="search" title="Search" href="../../../../search.html"/>
+ <link rel="top" title="Apache Beam documentation" href="../../../../index.html"/>
+ <link rel="up" title="Module code" href="../../../index.html"/>
+
+
+ <script src="../../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../../index.html" class="icon icon-home"> Apache Beam
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.coders.html">apache_beam.coders package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.internal.html">apache_beam.internal package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.io.html">apache_beam.io package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.metrics.html">apache_beam.metrics package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.options.html">apache_beam.options package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.portability.html">apache_beam.portability package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.runners.html">apache_beam.runners package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.testing.html">apache_beam.testing package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.transforms.html">apache_beam.transforms package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.typehints.html">apache_beam.typehints package</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.utils.html">apache_beam.utils package</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.error.html">apache_beam.error module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../apache_beam.version.html">apache_beam.version module</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../../index.html">Apache Beam</a>
+
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+ <ul class="wy-breadcrumbs">
+
+ <li><a href="../../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../../index.html">Module code</a> »</li>
+
+ <li>apache_beam.io.gcp.bigquery</li>
+
+
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+
+ </ul>
+
+
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for apache_beam.io.gcp.bigquery</h1><div class="highlight"><pre>
+<span></span><span class="c1">#</span>
+<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c1"># this work for additional information regarding copyright ownership.</span>
+<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c1"># (the "License"); you may not use this file except in compliance with</span>
+<span class="c1"># the License. You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="sd">"""BigQuery sources and sinks.</span>
+
+<span class="sd">This module implements reading from and writing to BigQuery tables. It relies</span>
+<span class="sd">on several classes exposed by the BigQuery API: TableSchema, TableFieldSchema,</span>
+<span class="sd">TableRow, and TableCell. The default mode is to return table rows read from a</span>
+<span class="sd">BigQuery source as dictionaries. Similarly a Write transform to a BigQuerySink</span>
+<span class="sd">accepts PCollections of dictionaries. This is done for more convenient</span>
+<span class="sd">programming. If desired, the native TableRow objects can be used throughout to</span>
+<span class="sd">represent rows (use an instance of TableRowJsonCoder as a coder argument when</span>
+<span class="sd">creating the sources or sinks respectively).</span>
+
+<span class="sd">Also, for programming convenience, instances of TableReference and TableSchema</span>
+<span class="sd">have a string representation that can be used for the corresponding arguments:</span>
+
+<span class="sd"> - TableReference can be a PROJECT:DATASET.TABLE or DATASET.TABLE string.</span>
+<span class="sd"> - TableSchema can be a NAME:TYPE{,NAME:TYPE}* string</span>
+<span class="sd"> (e.g. 'month:STRING,event_count:INTEGER').</span>
+
+<span class="sd">The syntax supported is described here:</span>
+<span class="sd">https://cloud.google.com/bigquery/bq-command-line-tool-quickstart</span>
+
+<span class="sd">BigQuery sources can be used as main inputs or side inputs. A main input</span>
+<span class="sd">(common case) is expected to be massive and will be split into manageable chunks</span>
+<span class="sd">and processed in parallel. Side inputs are expected to be small and will be read</span>
+<span class="sd">completely every time a ParDo DoFn gets executed. In the example below the</span>
+<span class="sd">lambda function implementing the DoFn for the Map transform will get on each</span>
+<span class="sd">call *one* row of the main table and *all* rows of the side table. The runner</span>
+<span class="sd">may use some caching techniques to share the side inputs between calls in order</span>
+<span class="sd">to avoid excessive reading:::</span>
+
+<span class="sd"> main_table = pipeline | 'VeryBig' >> beam.io.Read(beam.io.BigQuerySource()</span>
+<span class="sd"> side_table = pipeline | 'NotBig' >> beam.io.Read(beam.io.BigQuerySource()</span>
+<span class="sd"> results = (</span>
+<span class="sd"> main_table</span>
+<span class="sd"> | 'ProcessData' >> beam.Map(</span>
+<span class="sd"> lambda element, side_input: ..., AsList(side_table)))</span>
+
+<span class="sd">There is no difference in how main and side inputs are read. What makes the</span>
+<span class="sd">side_table a 'side input' is the AsList wrapper used when passing the table</span>
+<span class="sd">as a parameter to the Map transform. AsList signals to the execution framework</span>
+<span class="sd">that its input should be made available whole.</span>
+
+<span class="sd">The main and side inputs are implemented differently. Reading a BigQuery table</span>
+<span class="sd">as main input entails exporting the table to a set of GCS files (currently in</span>
+<span class="sd">JSON format) and then processing those files. Reading the same table as a side</span>
+<span class="sd">input entails querying the table for all its rows. The coder argument on</span>
+<span class="sd">BigQuerySource controls the reading of the lines in the export files (i.e.,</span>
+<span class="sd">transform a JSON object into a PCollection element). The coder is not involved</span>
+<span class="sd">when the same table is read as a side input since there is no intermediate</span>
+<span class="sd">format involved. We get the table rows directly from the BigQuery service with</span>
+<span class="sd">a query.</span>
+
+<span class="sd">Users may provide a query to read from rather than reading all of a BigQuery</span>
+<span class="sd">table. If specified, the result obtained by executing the specified query will</span>
+<span class="sd">be used as the data of the input transform.::</span>
+
+<span class="sd"> query_results = pipeline | beam.io.Read(beam.io.BigQuerySource(</span>
+<span class="sd"> query='SELECT year, mean_temp FROM samples.weather_stations'))</span>
+
+<span class="sd">When creating a BigQuery input transform, users should provide either a query</span>
+<span class="sd">or a table. Pipeline construction will fail with a validation error if neither</span>
+<span class="sd">or both are specified.</span>
+
+<span class="sd">**Time partitioned tables**</span>
+
+<span class="sd">BigQuery sink currently does not fully support writing to BigQuery</span>
+<span class="sd">time partitioned tables. But writing to a *single* partition may work if</span>
+<span class="sd">that does not involve creating a new table (for example, when writing to an</span>
+<span class="sd">existing table with `create_disposition=CREATE_NEVER` and</span>
+<span class="sd">`write_disposition=WRITE_APPEND`).</span>
+<span class="sd">BigQuery source supports reading from a single time partition with the partition</span>
+<span class="sd">decorator specified as a part of the table identifier.</span>
+
+<span class="sd">*** Short introduction to BigQuery concepts ***</span>
+<span class="sd">Tables have rows (TableRow) and each row has cells (TableCell).</span>
+<span class="sd">A table has a schema (TableSchema), which in turn describes the schema of each</span>
+<span class="sd">cell (TableFieldSchema). The terms field and cell are used interchangeably.</span>
+
+<span class="sd">TableSchema: Describes the schema (types and order) for values in each row.</span>
+<span class="sd"> Has one attribute, 'field', which is list of TableFieldSchema objects.</span>
+
+<span class="sd">TableFieldSchema: Describes the schema (type, name) for one field.</span>
+<span class="sd"> Has several attributes, including 'name' and 'type'. Common values for</span>
+<span class="sd"> the type attribute are: 'STRING', 'INTEGER', 'FLOAT', 'BOOLEAN'. All possible</span>
+<span class="sd"> values are described at:</span>
+<span class="sd"> https://cloud.google.com/bigquery/preparing-data-for-bigquery#datatypes</span>
+
+<span class="sd">TableRow: Holds all values in a table row. Has one attribute, 'f', which is a</span>
+<span class="sd"> list of TableCell instances.</span>
+
+<span class="sd">TableCell: Holds the value for one cell (or field). Has one attribute,</span>
+<span class="sd"> 'v', which is a JsonValue instance. This class is defined in</span>
+<span class="sd"> apitools.base.py.extra_types.py module.</span>
+<span class="sd">"""</span>
+
+<span class="kn">from</span> <span class="nn">__future__</span> <span class="k">import</span> <span class="n">absolute_import</span>
+
+<span class="kn">import</span> <span class="nn">collections</span>
+<span class="kn">import</span> <span class="nn">datetime</span>
+<span class="kn">import</span> <span class="nn">json</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">re</span>
+<span class="kn">import</span> <span class="nn">time</span>
+<span class="kn">import</span> <span class="nn">uuid</span>
+
+<span class="kn">from</span> <span class="nn">apache_beam</span> <span class="k">import</span> <span class="n">coders</span>
+<span class="kn">from</span> <span class="nn">apache_beam.internal.gcp</span> <span class="k">import</span> <span class="n">auth</span>
+<span class="kn">from</span> <span class="nn">apache_beam.internal.gcp.json_value</span> <span class="k">import</span> <span class="n">from_json_value</span>
+<span class="kn">from</span> <span class="nn">apache_beam.internal.gcp.json_value</span> <span class="k">import</span> <span class="n">to_json_value</span>
+<span class="kn">from</span> <span class="nn">apache_beam.io.gcp.internal.clients</span> <span class="k">import</span> <span class="n">bigquery</span>
+<span class="kn">from</span> <span class="nn">apache_beam.options.pipeline_options</span> <span class="k">import</span> <span class="n">GoogleCloudOptions</span>
+<span class="kn">from</span> <span class="nn">apache_beam.runners.dataflow.native_io</span> <span class="k">import</span> <span class="n">iobase</span> <span class="k">as</span> <span class="n">dataflow_io</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms</span> <span class="k">import</span> <span class="n">DoFn</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms</span> <span class="k">import</span> <span class="n">ParDo</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms</span> <span class="k">import</span> <span class="n">PTransform</span>
+<span class="kn">from</span> <span class="nn">apache_beam.transforms.display</span> <span class="k">import</span> <span class="n">DisplayDataItem</span>
+<span class="kn">from</span> <span class="nn">apache_beam.utils</span> <span class="k">import</span> <span class="n">retry</span>
+
+<span class="c1"># Protect against environments where bigquery library is not available.</span>
+<span class="c1"># pylint: disable=wrong-import-order, wrong-import-position</span>
+<span class="k">try</span><span class="p">:</span>
+ <span class="kn">from</span> <span class="nn">apitools.base.py.exceptions</span> <span class="k">import</span> <span class="n">HttpError</span>
+<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="k">pass</span>
+<span class="c1"># pylint: enable=wrong-import-order, wrong-import-position</span>
+
+
+<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="s1">'TableRowJsonCoder'</span><span class="p">,</span>
+ <span class="s1">'BigQueryDisposition'</span><span class="p">,</span>
+ <span class="s1">'BigQuerySource'</span><span class="p">,</span>
+ <span class="s1">'BigQuerySink'</span><span class="p">,</span>
+ <span class="s1">'WriteToBigQuery'</span><span class="p">,</span>
+ <span class="p">]</span>
+
+<span class="n">JSON_COMPLIANCE_ERROR</span> <span class="o">=</span> <span class="s1">'NAN, INF and -INF values are not JSON compliant.'</span>
+<span class="n">MAX_RETRIES</span> <span class="o">=</span> <span class="mi">3</span>
+
+
+<span class="k">class</span> <span class="nc">RowAsDictJsonCoder</span><span class="p">(</span><span class="n">coders</span><span class="o">.</span><span class="n">Coder</span><span class="p">):</span>
+ <span class="sd">"""A coder for a table row (represented as a dict) to/from a JSON string.</span>
+
+<span class="sd"> This is the default coder for sources and sinks if the coder argument is not</span>
+<span class="sd"> specified.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table_row</span><span class="p">):</span>
+ <span class="c1"># The normal error when dumping NAN/INF values is:</span>
+ <span class="c1"># ValueError: Out of range float values are not JSON compliant</span>
+ <span class="c1"># This code will catch this error to emit an error that explains</span>
+ <span class="c1"># to the programmer that they have used NAN/INF values.</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">table_row</span><span class="p">,</span> <span class="n">allow_nan</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'</span><span class="si">%s</span><span class="s1">. </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="n">JSON_COMPLIANCE_ERROR</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">decode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">encoded_table_row</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">encoded_table_row</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="TableRowJsonCoder"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.TableRowJsonCoder">[docs]</a><span class="k">class</span> <span class="nc">TableRowJsonCoder</span><span class="p">(</span><span class="n">coders</span><span class="o">.</span><span class="n">Coder</span><span class="p">):</span>
+ <span class="sd">"""A coder for a TableRow instance to/from a JSON string.</span>
+
+<span class="sd"> Note that the encoding operation (used when writing to sinks) requires the</span>
+<span class="sd"> table schema in order to obtain the ordered list of field names. Reading from</span>
+<span class="sd"> sources on the other hand does not need the table schema.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table_schema</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="c1"># The table schema is needed for encoding TableRows as JSON (writing to</span>
+ <span class="c1"># sinks) because the ordered list of field names is used in the JSON</span>
+ <span class="c1"># representation.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_schema</span> <span class="o">=</span> <span class="n">table_schema</span>
+ <span class="c1"># Precompute field names since we need them for row encoding.</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">table_schema</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">field_names</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">fs</span><span class="o">.</span><span class="n">name</span> <span class="k">for</span> <span class="n">fs</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">table_schema</span><span class="o">.</span><span class="n">fields</span><span class="p">)</span>
+
+<div class="viewcode-block" id="TableRowJsonCoder.encode"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.TableRowJsonCoder.encode">[docs]</a> <span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table_row</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">table_schema</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span>
+ <span class="s1">'The TableRowJsonCoder requires a table schema for '</span>
+ <span class="s1">'encoding operations. Please specify a table_schema argument.'</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span>
+ <span class="n">collections</span><span class="o">.</span><span class="n">OrderedDict</span><span class="p">(</span>
+ <span class="nb">zip</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">field_names</span><span class="p">,</span>
+ <span class="p">[</span><span class="n">from_json_value</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">v</span><span class="p">)</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">table_row</span><span class="o">.</span><span class="n">f</span><span class="p">])),</span>
+ <span class="n">allow_nan</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'</span><span class="si">%s</span><span class="s1">. </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="n">JSON_COMPLIANCE_ERROR</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="TableRowJsonCoder.decode"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.TableRowJsonCoder.decode">[docs]</a> <span class="k">def</span> <span class="nf">decode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">encoded_table_row</span><span class="p">):</span>
+ <span class="n">od</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span>
+ <span class="n">encoded_table_row</span><span class="p">,</span> <span class="n">object_pairs_hook</span><span class="o">=</span><span class="n">collections</span><span class="o">.</span><span class="n">OrderedDict</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">bigquery</span><span class="o">.</span><span class="n">TableRow</span><span class="p">(</span>
+ <span class="n">f</span><span class="o">=</span><span class="p">[</span><span class="n">bigquery</span><span class="o">.</span><span class="n">TableCell</span><span class="p">(</span><span class="n">v</span><span class="o">=</span><span class="n">to_json_value</span><span class="p">(</span><span class="n">e</span><span class="p">))</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">od</span><span class="o">.</span><span class="n" [...]
+
+
+<span class="k">def</span> <span class="nf">parse_table_schema_from_json</span><span class="p">(</span><span class="n">schema_string</span><span class="p">):</span>
+ <span class="sd">"""Parse the Table Schema provided as string.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> schema_string: String serialized table schema, should be a valid JSON.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> A TableSchema of the BigQuery export from either the Query or the Table.</span>
+<span class="sd"> """</span>
+ <span class="n">json_schema</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">schema_string</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">_parse_schema_field</span><span class="p">(</span><span class="n">field</span><span class="p">):</span>
+ <span class="sd">"""Parse a single schema field from dictionary.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> field: Dictionary object containing serialized schema.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> A TableFieldSchema for a single column in BigQuery.</span>
+<span class="sd"> """</span>
+ <span class="n">schema</span> <span class="o">=</span> <span class="n">bigquery</span><span class="o">.</span><span class="n">TableFieldSchema</span><span class="p">()</span>
+ <span class="n">schema</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">field</span><span class="p">[</span><span class="s1">'name'</span><span class="p">]</span>
+ <span class="n">schema</span><span class="o">.</span><span class="n">type</span> <span class="o">=</span> <span class="n">field</span><span class="p">[</span><span class="s1">'type'</span><span class="p">]</span>
+ <span class="k">if</span> <span class="s1">'mode'</span> <span class="ow">in</span> <span class="n">field</span><span class="p">:</span>
+ <span class="n">schema</span><span class="o">.</span><span class="n">mode</span> <span class="o">=</span> <span class="n">field</span><span class="p">[</span><span class="s1">'mode'</span><span class="p">]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">schema</span><span class="o">.</span><span class="n">mode</span> <span class="o">=</span> <span class="s1">'NULLABLE'</span>
+ <span class="k">if</span> <span class="s1">'description'</span> <span class="ow">in</span> <span class="n">field</span><span class="p">:</span>
+ <span class="n">schema</span><span class="o">.</span><span class="n">description</span> <span class="o">=</span> <span class="n">field</span><span class="p">[</span><span class="s1">'description'</span><span class="p">]</span>
+ <span class="k">if</span> <span class="s1">'fields'</span> <span class="ow">in</span> <span class="n">field</span><span class="p">:</span>
+ <span class="n">schema</span><span class="o">.</span><span class="n">fields</span> <span class="o">=</span> <span class="p">[</span><span class="n">_parse_schema_field</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">field</span><span class="p">[</span><span class="s1">'fields'</span><span class="p">]]</span>
+ <span class="k">return</span> <span class="n">schema</span>
+
+ <span class="n">fields</span> <span class="o">=</span> <span class="p">[</span><span class="n">_parse_schema_field</span><span class="p">(</span><span class="n">f</span><span class="p">)</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">json_schema</span><span class="p">[</span><span class="s1">'fields'</span><span class="p">]]</span>
+ <span class="k">return</span> <span class="n">bigquery</span><span class="o">.</span><span class="n">TableSchema</span><span class="p">(</span><span class="n">fields</span><span class="o">=</span><span class="n">fields</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="BigQueryDisposition"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.BigQueryDisposition">[docs]</a><span class="k">class</span> <span class="nc">BigQueryDisposition</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""Class holding standard strings used for create and write dispositions."""</span>
+
+ <span class="n">CREATE_NEVER</span> <span class="o">=</span> <span class="s1">'CREATE_NEVER'</span>
+ <span class="n">CREATE_IF_NEEDED</span> <span class="o">=</span> <span class="s1">'CREATE_IF_NEEDED'</span>
+ <span class="n">WRITE_TRUNCATE</span> <span class="o">=</span> <span class="s1">'WRITE_TRUNCATE'</span>
+ <span class="n">WRITE_APPEND</span> <span class="o">=</span> <span class="s1">'WRITE_APPEND'</span>
+ <span class="n">WRITE_EMPTY</span> <span class="o">=</span> <span class="s1">'WRITE_EMPTY'</span>
+
+<div class="viewcode-block" id="BigQueryDisposition.validate_create"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.BigQueryDisposition.validate_create">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">validate_create</span><span class="p">(</span><span class="n">disposition</span><span class="p">):</span>
+ <span class="n">values</span> <span class="o">=</span> <span class="p">(</span><span class="n">BigQueryDisposition</span><span class="o">.</span><span class="n">CREATE_NEVER</span><span class="p">,</span>
+ <span class="n">BigQueryDisposition</span><span class="o">.</span><span class="n">CREATE_IF_NEEDED</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">disposition</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">values</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+ <span class="s1">'Invalid create disposition </span><span class="si">%s</span><span class="s1">. Expecting </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">disposition</span><span class="p">,</span> <span class="n">values</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">disposition</span></div>
+
+<div class="viewcode-block" id="BigQueryDisposition.validate_write"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.BigQueryDisposition.validate_write">[docs]</a> <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">validate_write</span><span class="p">(</span><span class="n">disposition</span><span class="p">):</span>
+ <span class="n">values</span> <span class="o">=</span> <span class="p">(</span><span class="n">BigQueryDisposition</span><span class="o">.</span><span class="n">WRITE_TRUNCATE</span><span class="p">,</span>
+ <span class="n">BigQueryDisposition</span><span class="o">.</span><span class="n">WRITE_APPEND</span><span class="p">,</span>
+ <span class="n">BigQueryDisposition</span><span class="o">.</span><span class="n">WRITE_EMPTY</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">disposition</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">values</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+ <span class="s1">'Invalid write disposition </span><span class="si">%s</span><span class="s1">. Expecting </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">disposition</span><span class="p">,</span> <span class="n">values</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">disposition</span></div></div>
+
+
+<span class="k">def</span> <span class="nf">_parse_table_reference</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">dataset</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">project</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="sd">"""Parses a table reference into a (project, dataset, table) tuple.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> table: The ID of the table. The ID must contain only letters</span>
+<span class="sd"> (a-z, A-Z), numbers (0-9), or underscores (_). If dataset argument is None</span>
+<span class="sd"> then the table argument must contain the entire table reference:</span>
+<span class="sd"> 'DATASET.TABLE' or 'PROJECT:DATASET.TABLE'. This argument can be a</span>
+<span class="sd"> bigquery.TableReference instance in which case dataset and project are</span>
+<span class="sd"> ignored and the reference is returned as a result. Additionally, for date</span>
+<span class="sd"> partitioned tables, appending '$YYYYmmdd' to the table name is supported,</span>
+<span class="sd"> e.g. 'DATASET.TABLE$YYYYmmdd'.</span>
+<span class="sd"> dataset: The ID of the dataset containing this table or null if the table</span>
+<span class="sd"> reference is specified entirely by the table argument.</span>
+<span class="sd"> project: The ID of the project containing this table or null if the table</span>
+<span class="sd"> reference is specified entirely by the table (and possibly dataset)</span>
+<span class="sd"> argument.</span>
+
+<span class="sd"> Returns:</span>
+<span class="sd"> A bigquery.TableReference object. The object has the following attributes:</span>
+<span class="sd"> projectId, datasetId, and tableId.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ValueError: if the table reference as a string does not match the expected</span>
+<span class="sd"> format.</span>
+<span class="sd"> """</span>
+
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">bigquery</span><span class="o">.</span><span class="n">TableReference</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">table</span>
+
+ <span class="n">table_reference</span> <span class="o">=</span> <span class="n">bigquery</span><span class="o">.</span><span class="n">TableReference</span><span class="p">()</span>
+ <span class="c1"># If dataset argument is not specified, the expectation is that the</span>
+ <span class="c1"># table argument will contain a full table reference instead of just a</span>
+ <span class="c1"># table name.</span>
+ <span class="k">if</span> <span class="n">dataset</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">match</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span>
+ <span class="sa">r</span><span class="s1">'^((?P<project>.+):)?(?P<dataset>\w+)\.(?P<table>[\w\$]+)$'</span><span class="p">,</span> <span class="n">table</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">match</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+ <span class="s1">'Expected a table reference (PROJECT:DATASET.TABLE or '</span>
+ <span class="s1">'DATASET.TABLE) instead of </span><span class="si">%s</span><span class="s1">.'</span> <span class="o">%</span> <span class="n">table</span><span class="p">)</span>
+ <span class="n">table_reference</span><span class="o">.</span><span class="n">projectId</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'project'</span><span class="p">)</span>
+ <span class="n">table_reference</span><span class="o">.</span><span class="n">datasetId</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'dataset'</span><span class="p">)</span>
+ <span class="n">table_reference</span><span class="o">.</span><span class="n">tableId</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'table'</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">table_reference</span><span class="o">.</span><span class="n">projectId</span> <span class="o">=</span> <span class="n">project</span>
+ <span class="n">table_reference</span><span class="o">.</span><span class="n">datasetId</span> <span class="o">=</span> <span class="n">dataset</span>
+ <span class="n">table_reference</span><span class="o">.</span><span class="n">tableId</span> <span class="o">=</span> <span class="n">table</span>
+ <span class="k">return</span> <span class="n">table_reference</span>
+
+
+<span class="c1"># -----------------------------------------------------------------------------</span>
+<span class="c1"># BigQuerySource, BigQuerySink.</span>
+
+
+<div class="viewcode-block" id="BigQuerySource"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.BigQuerySource">[docs]</a><span class="k">class</span> <span class="nc">BigQuerySource</span><span class="p">(</span><span class="n">dataflow_io</span><span class="o">.</span><span class="n">NativeSource</span><span class="p">):</span>
+ <span class="sd">"""A source based on a BigQuery table."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">dataset</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">project</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">query</span><span c [...]
+ <span class="n">validate</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">coder</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">use_standard_sql</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+ <span class="n">flatten_results</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
+ <span class="sd">"""Initialize a :class:`BigQuerySource`.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> table (str): The ID of a BigQuery table. If specified all data of the</span>
+<span class="sd"> table will be used as input of the current source. The ID must contain</span>
+<span class="sd"> only letters ``a-z``, ``A-Z``, numbers ``0-9``, or underscores</span>
+<span class="sd"> ``_``. If dataset and query arguments are :data:`None` then the table</span>
+<span class="sd"> argument must contain the entire table reference specified as:</span>
+<span class="sd"> ``'DATASET.TABLE'`` or ``'PROJECT:DATASET.TABLE'``.</span>
+<span class="sd"> dataset (str): The ID of the dataset containing this table or</span>
+<span class="sd"> :data:`None` if the table reference is specified entirely by the table</span>
+<span class="sd"> argument or a query is specified.</span>
+<span class="sd"> project (str): The ID of the project containing this table or</span>
+<span class="sd"> :data:`None` if the table reference is specified entirely by the table</span>
+<span class="sd"> argument or a query is specified.</span>
+<span class="sd"> query (str): A query to be used instead of arguments table, dataset, and</span>
+<span class="sd"> project.</span>
+<span class="sd"> validate (bool): If :data:`True`, various checks will be done when source</span>
+<span class="sd"> gets initialized (e.g., is table present?). This should be</span>
+<span class="sd"> :data:`True` for most scenarios in order to catch errors as early as</span>
+<span class="sd"> possible (pipeline construction instead of pipeline execution). It</span>
+<span class="sd"> should be :data:`False` if the table is created during pipeline</span>
+<span class="sd"> execution by a previous step.</span>
+<span class="sd"> coder (~apache_beam.coders.coders.Coder): The coder for the table</span>
+<span class="sd"> rows if serialized to disk. If :data:`None`, then the default coder is</span>
+<span class="sd"> :class:`~apache_beam.io.gcp.bigquery.RowAsDictJsonCoder`,</span>
+<span class="sd"> which will interpret every line in a file as a JSON serialized</span>
+<span class="sd"> dictionary. This argument needs a value only in special cases when</span>
+<span class="sd"> returning table rows as dictionaries is not desirable.</span>
+<span class="sd"> use_standard_sql (bool): Specifies whether to use BigQuery's standard SQL</span>
+<span class="sd"> dialect for this query. The default value is :data:`False`.</span>
+<span class="sd"> If set to :data:`True`, the query will use BigQuery's updated SQL</span>
+<span class="sd"> dialect with improved standards compliance.</span>
+<span class="sd"> This parameter is ignored for table inputs.</span>
+<span class="sd"> flatten_results (bool): Flattens all nested and repeated fields in the</span>
+<span class="sd"> query results. The default value is :data:`True`.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ~exceptions.ValueError: if any of the following is true:</span>
+
+<span class="sd"> 1) the table reference as a string does not match the expected format</span>
+<span class="sd"> 2) neither a table nor a query is specified</span>
+<span class="sd"> 3) both a table and a query is specified.</span>
+<span class="sd"> """</span>
+
+ <span class="c1"># Import here to avoid adding the dependency for local running scenarios.</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="c1"># pylint: disable=wrong-import-order, wrong-import-position</span>
+ <span class="kn">from</span> <span class="nn">apitools.base</span> <span class="k">import</span> <span class="n">py</span> <span class="c1"># pylint: disable=unused-variable</span>
+ <span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ImportError</span><span class="p">(</span>
+ <span class="s1">'Google Cloud IO not available, '</span>
+ <span class="s1">'please install apache_beam[gcp]'</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">table</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">query</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Both a BigQuery table and a query were specified.'</span>
+ <span class="s1">' Please specify only one of these.'</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">table</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">query</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'A BigQuery table or a query must be specified'</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">table</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span> <span class="o">=</span> <span class="n">_parse_table_reference</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">project</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">query</span> <span class="o">=</span> <span class="kc">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">use_legacy_sql</span> <span class="o">=</span> <span class="kc">True</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">query</span> <span class="o">=</span> <span class="n">query</span>
+ <span class="c1"># TODO(BEAM-1082): Change the internal flag to be standard_sql</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">use_legacy_sql</span> <span class="o">=</span> <span class="ow">not</span> <span class="n">use_standard_sql</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span> <span class="o">=</span> <span class="kc">None</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">validate</span> <span class="o">=</span> <span class="n">validate</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">flatten_results</span> <span class="o">=</span> <span class="n">flatten_results</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">coder</span> <span class="o">=</span> <span class="n">coder</span> <span class="ow">or</span> <span class="n">RowAsDictJsonCoder</span><span class="p">()</span>
+
+<div class="viewcode-block" id="BigQuerySource.display_data"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.BigQuerySource.display_data">[docs]</a> <span class="k">def</span> <span class="nf">display_data</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">query</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">res</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">DisplayDataItem</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">query</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">'Query'</span><span class="p">)}</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">projectId</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">tableSpec</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{}</span><span class="s1">:</span><span class="si">{}</span><span class="s1">.</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">projectId</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">datasetId</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">tableId</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">tableSpec</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{}</span><span class="s1">.</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">datasetId</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">tableId</span><span class="p">)</span>
+ <span class="n">res</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'table'</span><span class="p">:</span> <span class="n">DisplayDataItem</span><span class="p">(</span><span class="n">tableSpec</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">'Table'</span><span class="p">)}</span>
+
+ <span class="n">res</span><span class="p">[</span><span class="s1">'validation'</span><span class="p">]</span> <span class="o">=</span> <span class="n">DisplayDataItem</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">validate</span><span class="p">,</span>
+ <span class="n">label</span><span class="o">=</span><span class="s1">'Validation Enabled'</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">res</span></div>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Source format name required for remote execution."""</span>
+ <span class="k">return</span> <span class="s1">'bigquery'</span>
+
+<div class="viewcode-block" id="BigQuerySource.reader"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.BigQuerySource.reader">[docs]</a> <span class="k">def</span> <span class="nf">reader</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">test_bigquery_client</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">BigQueryReader</span><span class="p">(</span>
+ <span class="n">source</span><span class="o">=</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">test_bigquery_client</span><span class="o">=</span><span class="n">test_bigquery_client</span><span class="p">,</span>
+ <span class="n">use_legacy_sql</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">use_legacy_sql</span><span class="p">,</span>
+ <span class="n">flatten_results</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">flatten_results</span><span class="p">)</span></div></div>
+
+
+<div class="viewcode-block" id="BigQuerySink"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.BigQuerySink">[docs]</a><span class="k">class</span> <span class="nc">BigQuerySink</span><span class="p">(</span><span class="n">dataflow_io</span><span class="o">.</span><span class="n">NativeSink</span><span class="p">):</span>
+ <span class="sd">"""A sink based on a BigQuery table."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">dataset</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">project</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="kc">None</span><span [...]
+ <span class="n">create_disposition</span><span class="o">=</span><span class="n">BigQueryDisposition</span><span class="o">.</span><span class="n">CREATE_IF_NEEDED</span><span class="p">,</span>
+ <span class="n">write_disposition</span><span class="o">=</span><span class="n">BigQueryDisposition</span><span class="o">.</span><span class="n">WRITE_EMPTY</span><span class="p">,</span>
+ <span class="n">validate</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">coder</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="sd">"""Initialize a BigQuerySink.</span>
+
+<span class="sd"> Args:</span>
+<span class="sd"> table (str): The ID of the table. The ID must contain only letters</span>
+<span class="sd"> ``a-z``, ``A-Z``, numbers ``0-9``, or underscores ``_``. If</span>
+<span class="sd"> **dataset** argument is :data:`None` then the table argument must</span>
+<span class="sd"> contain the entire table reference specified as: ``'DATASET.TABLE'`` or</span>
+<span class="sd"> ``'PROJECT:DATASET.TABLE'``.</span>
+<span class="sd"> dataset (str): The ID of the dataset containing this table or</span>
+<span class="sd"> :data:`None` if the table reference is specified entirely by the table</span>
+<span class="sd"> argument.</span>
+<span class="sd"> project (str): The ID of the project containing this table or</span>
+<span class="sd"> :data:`None` if the table reference is specified entirely by the table</span>
+<span class="sd"> argument.</span>
+<span class="sd"> schema (str): The schema to be used if the BigQuery table to write has</span>
+<span class="sd"> to be created. This can be either specified as a</span>
+<span class="sd"> :class:`~apache_beam.io.gcp.internal.clients.bigquery.\</span>
+<span class="sd">bigquery_v2_messages.TableSchema` object or a single string of the form</span>
+<span class="sd"> ``'field1:type1,field2:type2,field3:type3'`` that defines a comma</span>
+<span class="sd"> separated list of fields. Here ``'type'`` should specify the BigQuery</span>
+<span class="sd"> type of the field. Single string based schemas do not support nested</span>
+<span class="sd"> fields, repeated fields, or specifying a BigQuery mode for fields (mode</span>
+<span class="sd"> will always be set to ``'NULLABLE'``).</span>
+<span class="sd"> create_disposition (BigQueryDisposition): A string describing what</span>
+<span class="sd"> happens if the table does not exist. Possible values are:</span>
+
+<span class="sd"> * :attr:`BigQueryDisposition.CREATE_IF_NEEDED`: create if does not</span>
+<span class="sd"> exist.</span>
+<span class="sd"> * :attr:`BigQueryDisposition.CREATE_NEVER`: fail the write if does not</span>
+<span class="sd"> exist.</span>
+
+<span class="sd"> write_disposition (BigQueryDisposition): A string describing what</span>
+<span class="sd"> happens if the table has already some data. Possible values are:</span>
+
+<span class="sd"> * :attr:`BigQueryDisposition.WRITE_TRUNCATE`: delete existing rows.</span>
+<span class="sd"> * :attr:`BigQueryDisposition.WRITE_APPEND`: add to existing rows.</span>
+<span class="sd"> * :attr:`BigQueryDisposition.WRITE_EMPTY`: fail the write if table not</span>
+<span class="sd"> empty.</span>
+
+<span class="sd"> validate (bool): If :data:`True`, various checks will be done when sink</span>
+<span class="sd"> gets initialized (e.g., is table present given the disposition</span>
+<span class="sd"> arguments?). This should be :data:`True` for most scenarios in order to</span>
+<span class="sd"> catch errors as early as possible (pipeline construction instead of</span>
+<span class="sd"> pipeline execution). It should be :data:`False` if the table is created</span>
+<span class="sd"> during pipeline execution by a previous step.</span>
+<span class="sd"> coder (~apache_beam.coders.coders.Coder): The coder for the</span>
+<span class="sd"> table rows if serialized to disk. If :data:`None`, then the default</span>
+<span class="sd"> coder is :class:`~apache_beam.io.gcp.bigquery.RowAsDictJsonCoder`,</span>
+<span class="sd"> which will interpret every element written to the sink as a dictionary</span>
+<span class="sd"> that will be JSON serialized as a line in a file. This argument needs a</span>
+<span class="sd"> value only in special cases when writing table rows as dictionaries is</span>
+<span class="sd"> not desirable.</span>
+
+<span class="sd"> Raises:</span>
+<span class="sd"> ~exceptions.TypeError: if the schema argument is not a :class:`str` or a</span>
+<span class="sd"> :class:`~apache_beam.io.gcp.internal.clients.bigquery.\</span>
+<span class="sd">bigquery_v2_messages.TableSchema` object.</span>
+<span class="sd"> ~exceptions.ValueError: if the table reference as a string does not</span>
+<span class="sd"> match the expected format.</span>
+<span class="sd"> """</span>
+
+ <span class="c1"># Import here to avoid adding the dependency for local running scenarios.</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="c1"># pylint: disable=wrong-import-order, wrong-import-position</span>
+ <span class="kn">from</span> <span class="nn">apitools.base</span> <span class="k">import</span> <span class="n">py</span> <span class="c1"># pylint: disable=unused-variable</span>
+ <span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ImportError</span><span class="p">(</span>
+ <span class="s1">'Google Cloud IO not available, '</span>
+ <span class="s1">'please install apache_beam[gcp]'</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span> <span class="o">=</span> <span class="n">_parse_table_reference</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">project</span><span class="p">)</span>
+ <span class="c1"># Transform the table schema into a bigquery.TableSchema instance.</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">basestring</span><span class="p">):</span>
+ <span class="c1"># TODO(silviuc): Should add a regex-based validation of the format.</span>
+ <span class="n">table_schema</span> <span class="o">=</span> <span class="n">bigquery</span><span class="o">.</span><span class="n">TableSchema</span><span class="p">()</span>
+ <span class="n">schema_list</span> <span class="o">=</span> <span class="p">[</span><span class="n">s</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s1">' '</span><span class="p">)</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">schema</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">','</span><span class="p">)]</span>
+ <span class="k">for</span> <span class="n">field_and_type</span> <span class="ow">in</span> <span class="n">schema_list</span><span class="p">:</span>
+ <span class="n">field_name</span><span class="p">,</span> <span class="n">field_type</span> <span class="o">=</span> <span class="n">field_and_type</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">':'</span><span class="p">)</span>
+ <span class="n">field_schema</span> <span class="o">=</span> <span class="n">bigquery</span><span class="o">.</span><span class="n">TableFieldSchema</span><span class="p">()</span>
+ <span class="n">field_schema</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">field_name</span>
+ <span class="n">field_schema</span><span class="o">.</span><span class="n">type</span> <span class="o">=</span> <span class="n">field_type</span>
+ <span class="n">field_schema</span><span class="o">.</span><span class="n">mode</span> <span class="o">=</span> <span class="s1">'NULLABLE'</span>
+ <span class="n">table_schema</span><span class="o">.</span><span class="n">fields</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">field_schema</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_schema</span> <span class="o">=</span> <span class="n">table_schema</span>
+ <span class="k">elif</span> <span class="n">schema</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="c1"># TODO(silviuc): Should check that table exists if no schema specified.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_schema</span> <span class="o">=</span> <span class="n">schema</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">bigquery</span><span class="o">.</span><span class="n">TableSchema</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_schema</span> <span class="o">=</span> <span class="n">schema</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'Unexpected schema argument: </span><span class="si">%s</span><span class="s1">.'</span> <span class="o">%</span> <span class="n">schema</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">create_disposition</span> <span class="o">=</span> <span class="n">BigQueryDisposition</span><span class="o">.</span><span class="n">validate_create</span><span class="p">(</span>
+ <span class="n">create_disposition</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">write_disposition</span> <span class="o">=</span> <span class="n">BigQueryDisposition</span><span class="o">.</span><span class="n">validate_write</span><span class="p">(</span>
+ <span class="n">write_disposition</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">validate</span> <span class="o">=</span> <span class="n">validate</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">coder</span> <span class="o">=</span> <span class="n">coder</span> <span class="ow">or</span> <span class="n">RowAsDictJsonCoder</span><span class="p">()</span>
+
+<div class="viewcode-block" id="BigQuerySink.display_data"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.BigQuerySink.display_data">[docs]</a> <span class="k">def</span> <span class="nf">display_data</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">res</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">tableSpec</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{}</span><span class="s1">.</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">datasetId</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">tableId</span><span class="p">)</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">projectId</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">tableSpec</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{}</span><span class="s1">:</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">projectId</span><span class="p">,</span>
+ <span class="n">tableSpec</span><span class="p">)</span>
+ <span class="n">res</span><span class="p">[</span><span class="s1">'table'</span><span class="p">]</span> <span class="o">=</span> <span class="n">DisplayDataItem</span><span class="p">(</span><span class="n">tableSpec</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">'Table'</span><span class="p">)</span>
+
+ <span class="n">res</span><span class="p">[</span><span class="s1">'validation'</span><span class="p">]</span> <span class="o">=</span> <span class="n">DisplayDataItem</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">validate</span><span class="p">,</span>
+ <span class="n">label</span><span class="o">=</span><span class="s2">"Validation Enabled"</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">res</span></div>
+
+<div class="viewcode-block" id="BigQuerySink.schema_as_json"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.BigQuerySink.schema_as_json">[docs]</a> <span class="k">def</span> <span class="nf">schema_as_json</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Returns the TableSchema associated with the sink as a JSON string."""</span>
+
+ <span class="k">def</span> <span class="nf">schema_list_as_object</span><span class="p">(</span><span class="n">schema_list</span><span class="p">):</span>
+ <span class="sd">"""Returns a list of TableFieldSchema objects as a list of dicts."""</span>
+ <span class="n">fields</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">schema_list</span><span class="p">:</span>
+ <span class="n">fs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'name'</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">'type'</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">type</span><span class="p">}</span>
+ <span class="k">if</span> <span class="n">f</span><span class="o">.</span><span class="n">description</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">fs</span><span class="p">[</span><span class="s1">'description'</span><span class="p">]</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">description</span>
+ <span class="k">if</span> <span class="n">f</span><span class="o">.</span><span class="n">mode</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">fs</span><span class="p">[</span><span class="s1">'mode'</span><span class="p">]</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">mode</span>
+ <span class="k">if</span> <span class="n">f</span><span class="o">.</span><span class="n">type</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="s1">'record'</span><span class="p">:</span>
+ <span class="n">fs</span><span class="p">[</span><span class="s1">'fields'</span><span class="p">]</span> <span class="o">=</span> <span class="n">schema_list_as_object</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">fields</span><span class="p">)</span>
+ <span class="n">fields</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">fs</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">fields</span>
+ <span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span>
+ <span class="p">{</span><span class="s1">'fields'</span><span class="p">:</span> <span class="n">schema_list_as_object</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">table_schema</span><span class="o">.</span><span class="n">fields</span><span class="p">)})</span></div>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Sink format name required for remote execution."""</span>
+ <span class="k">return</span> <span class="s1">'bigquery'</span>
+
+<div class="viewcode-block" id="BigQuerySink.writer"><a class="viewcode-back" href="../../../../apache_beam.io.gcp.bigquery.html#apache_beam.io.gcp.bigquery.BigQuerySink.writer">[docs]</a> <span class="k">def</span> <span class="nf">writer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">test_bigquery_client</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o [...]
+ <span class="k">return</span> <span class="n">BigQueryWriter</span><span class="p">(</span>
+ <span class="n">sink</span><span class="o">=</span><span class="bp">self</span><span class="p">,</span> <span class="n">test_bigquery_client</span><span class="o">=</span><span class="n">test_bigquery_client</span><span class="p">,</span>
+ <span class="n">buffer_size</span><span class="o">=</span><span class="n">buffer_size</span><span class="p">)</span></div></div>
+
+
+<span class="c1"># -----------------------------------------------------------------------------</span>
+<span class="c1"># BigQueryReader, BigQueryWriter.</span>
+
+
+<span class="k">class</span> <span class="nc">BigQueryReader</span><span class="p">(</span><span class="n">dataflow_io</span><span class="o">.</span><span class="n">NativeSourceReader</span><span class="p">):</span>
+ <span class="sd">"""A reader for a BigQuery source."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">,</span> <span class="n">test_bigquery_client</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">use_legacy_sql</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
+ <span class="n">flatten_results</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">source</span> <span class="o">=</span> <span class="n">source</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">test_bigquery_client</span> <span class="o">=</span> <span class="n">test_bigquery_client</span>
+ <span class="k">if</span> <span class="n">auth</span><span class="o">.</span><span class="n">is_running_in_gce</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">executing_project</span> <span class="o">=</span> <span class="n">auth</span><span class="o">.</span><span class="n">executing_project</span>
+ <span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">source</span><span class="p">,</span> <span class="s1">'pipeline_options'</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">executing_project</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">source</span><span class="o">.</span><span class="n">pipeline_options</span><span class="o">.</span><span class="n">view_as</span><span class="p">(</span><span class="n">GoogleCloudOptions</span><span class="p">)</span><span class="o">.</span><span class="n">project</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">executing_project</span> <span class="o">=</span> <span class="kc">None</span>
+
+ <span class="c1"># TODO(silviuc): Try to automatically get it from gcloud config info.</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">executing_project</span> <span class="ow">and</span> <span class="n">test_bigquery_client</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
+ <span class="s1">'Missing executing project information. Please use the --project '</span>
+ <span class="s1">'command line option to specify it.'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">row_as_dict</span> <span class="o">=</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">coder</span><span class="p">,</span> <span class="n">RowAsDictJsonCoder</span><span class="p">)</span>
+ <span class="c1"># Schema for the rows being read by the reader. It is initialized the</span>
+ <span class="c1"># first time something gets read from the table. It is not required</span>
+ <span class="c1"># for reading the field values in each row but could be useful for</span>
+ <span class="c1"># getting additional details.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">schema</span> <span class="o">=</span> <span class="kc">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">use_legacy_sql</span> <span class="o">=</span> <span class="n">use_legacy_sql</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">flatten_results</span> <span class="o">=</span> <span class="n">flatten_results</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">query</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="c1"># If table schema did not define a project we default to executing</span>
+ <span class="c1"># project.</span>
+ <span class="n">project_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">projectId</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">project_id</span><span class="p">:</span>
+ <span class="n">project_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">executing_project</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">query</span> <span class="o">=</span> <span class="s1">'SELECT * FROM [</span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1">];'</span> <span class="o">%</span> <span class="p">(</span>
+ <span class="n">project_id</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">datasetId</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">tableId</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">query</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">query</span>
+
+ <span class="k">def</span> <span class="nf">_get_source_table_location</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">tr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">source</span><span class="o">.</span><span class="n">table_reference</span>
+ <span class="k">if</span> <span class="n">tr</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="c1"># TODO: implement location retrieval for query sources</span>
+ <span class="k">return</span>
+
+ <span class="k">if</span> <span class="n">tr</span><span class="o">.</span><span class="n">projectId</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="n">source_project_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">executing_project</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">source_project_id</span> <span class="o">=</span> <span class="n">tr</span><span class="o">.</span><span class="n">projectId</span>
+
+ <span class="n">source_dataset_id</span> <span class="o">=</span> <span class="n">tr</span><span class="o">.</span><span class="n">datasetId</span>
+ <span class="n">source_table_id</span> <span class="o">=</span> <span class="n">tr</span><span class="o">.</span><span class="n">tableId</span>
+ <span class="n">source_location</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">client</span><span class="o">.</span><span class="n">get_table_location</span><span class="p">(</span>
+ <span class="n">source_project_id</span><span class="p">,</span> <span class="n">source_dataset_id</span><span class="p">,</span> <span class="n">source_table_id</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">source_location</span>
+
+ <span class="k">def</span> <span class="nf">__enter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">client</span> <span class="o">=</span> <span class="n">BigQueryWrapper</span><span class="p">(</span><span class="n">client</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">test_bigquery_client</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">client</span><span class="o">.</span><span class="n">create_temporary_dataset</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">executing_project</span><span class="p">,</span> <span class="n">location</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_get_source_table_location</span><span class="p">())</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+ <span class="k">def</span> <span class="nf">__exit__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">exception_type</span><span class="p">,</span> <span class="n">exception_value</span><span class="p">,</span> <span class="n">traceback</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">client</span><span class="o">.</span><span class="n">clean_up_temporary_dataset</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">executing_project</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">rows</span><span class="p">,</span> <span class="n">schema</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">client</span><span class="o">.</span><span class="n">run_query</span><span class="p">(</span>
+ <span class="n">project_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">executing_project</span><span class="p">,</span> <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">query</span><span class="p">,</span>
+ <span class="n">use_legacy_sql</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">use_legacy_sql</span><span class="p">,</span>
+ <span class="n">flatten_results</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">flatten_results</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">schema</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">schema</span> <span class="o">=</span> <span class="n">schema</span>
+ <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">rows</span><span class="p">:</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">row_as_dict</span><span class="p">:</span>
+ <span class="k">yield</span> <span class="bp">self</span><span class="o">.</span><span class="n">client</span><span class="o">.</span><span class="n">convert_row_to_dict</span><span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">yield</span> <span class="n">row</span>
+
+
+<span class="k">class</span> <span class="nc">BigQueryWriter</span><span class="p">(</span><span class="n">dataflow_io</span><span class="o">.</span><span class="n">NativeSinkWriter</span><span class="p">):</span>
+ <span class="sd">"""The sink writer for a BigQuerySink."""</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sink</span><span class="p">,</span> <span class="n">test_bigquery_client</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sink</span> <span class="o">=</span> <span class="n">sink</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">test_bigquery_client</span> <span class="o">=</span> <span class="n">test_bigquery_client</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">row_as_dict</span> <span class="o">=</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sink</span><span class="o">.</span><span class="n">coder</span><span class="p">,</span> <span class="n">RowAsDictJsonCoder</span><span class="p">)</span>
+ <span class="c1"># Buffer used to batch written rows so we reduce communication with the</span>
+ <span class="c1"># BigQuery service.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">rows_buffer</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">rows_buffer_flush_threshold</span> <span class="o">=</span> <span class="n">buffer_size</span> <span class="ow">or</span> <span class="mi">1000</span>
+ <span class="c1"># Figure out the project, dataset, and table used for the sink.</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sink</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">projectId</span>
+
+ <span class="c1"># If table schema did not define a project we default to executing project.</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">sink</span><span class="p">,</span> <span class="s1">'pipeline_options'</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">sink</span><span class="o">.</span><span class="n">pipeline_options</span><span class="o">.</span><span class="n">view_as</span><span class="p">(</span><span class="n">GoogleCloudOptions</span><span class="p">)</span><span class="o">.</span><span class="n">project</span><span class="p">)</span>
+
+ <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">dataset_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sink</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">datasetId</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sink</span><span class="o">.</span><span class="n">table_reference</span><span class="o">.</span><span class="n">tableId</span>
+
+ <span class="k">def</span> <span class="nf">_flush_rows_buffer</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">rows_buffer</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Writing </span><span class="si">%d</span><span class="s1"> rows to </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1"> table.'</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">. [...]
+ <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dataset_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">table_id</span><span class="p">)</span>
+ <span class="n">passed</span><span class="p">,</span> <span class="n">errors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">client</span><span class="o">.</span><span class="n">insert_rows</span><span class="p">(</span>
+ <span class="n">project_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dataset_id</span><span class="p">,</span>
+ <span class="n">table_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">table_id</span><span class="p">,</span> <span class="n">rows</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">rows_buffer</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">rows_buffer</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">passed</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s1">'Could not successfully insert rows to BigQuery'</span>
+ <span class="s1">' table [</span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1">]. Errors: </span><span class="si">%s</span><span class="s1">'</span><span class="o">%</span>
+ <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dataset_id</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table_id</span><span class="p">,</span> <span class="n">errors</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">__enter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">client</span> <span class="o">=</span> <span class="n">BigQueryWrapper</span><span class="p">(</span><span class="n">client</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">test_bigquery_client</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">client</span><span class="o">.</span><span class="n">get_or_create_table</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dataset_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">table_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">sink</span><span class="o">.</span><span class="n">table_schema</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sink</span><span class="o">.</span><span class="n">create_disposition</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">sink</span><span class="o">.</span><span class="n">write_disposition</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+ <span class="k">def</span> <span class="nf">__exit__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">exception_type</span><span class="p">,</span> <span class="n">exception_value</span><span class="p">,</span> <span class="n">traceback</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_flush_rows_buffer</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">Write</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">row</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">rows_buffer</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">row</span><span class="p">)</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">rows_buffer</span><span class="p">)</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">rows_buffer_flush_threshold</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_flush_rows_buffer</span><span class="p">()</span>
+
+
+<span class="c1"># -----------------------------------------------------------------------------</span>
+<span class="c1"># BigQueryWrapper.</span>
+
+
+<span class="k">class</span> <span class="nc">BigQueryWrapper</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""BigQuery client wrapper with utilities for querying.</span>
... 120216 lines suppressed ...
--
To stop receiving notification emails like this one, please contact
mergebot-role@apache.org.