You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by dh...@apache.org on 2016/07/18 16:53:27 UTC
[2/2] incubator-beam git commit: Clarifies that 'TextFileSource' only
supports UTF-8 and ASCII.encodings.
Clarifies that 'TextFileSource' only supports UTF-8 and ASCII.encodings.
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/86a3ec73
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/86a3ec73
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/86a3ec73
Branch: refs/heads/python-sdk
Commit: 86a3ec73224dd8a8d26606b0379bfa30c62feb4a
Parents: 7e0497b
Author: Chamikara Jayalath <ch...@google.com>
Authored: Fri Jul 15 14:12:16 2016 -0700
Committer: Dan Halperin <dh...@google.com>
Committed: Mon Jul 18 09:53:22 2016 -0700
----------------------------------------------------------------------
sdks/python/apache_beam/io/fileio.py | 4 ++++
1 file changed, 4 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/86a3ec73/sdks/python/apache_beam/io/fileio.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/fileio.py b/sdks/python/apache_beam/io/fileio.py
index 8e657d7..3afaae8 100644
--- a/sdks/python/apache_beam/io/fileio.py
+++ b/sdks/python/apache_beam/io/fileio.py
@@ -72,6 +72,9 @@ class TextFileSource(iobase.NativeSource):
Parses a text file as newline-delimited elements, by default assuming
UTF-8 encoding.
+
+ This implementation has only been tested to read text encoded using UTF-8 or
+ ASCII. This has not been tested for other encodings such as UTF-16 or UTF-32.
"""
def __init__(self, file_path, start_offset=None, end_offset=None,
@@ -91,6 +94,7 @@ class TextFileSource(iobase.NativeSource):
is 'AUTO'.
strip_trailing_newlines: Indicates whether this source should remove
the newline char in each line it reads before decoding that line.
+ This feature only works for ASCII and UTF-8 encoded input.
coder: Coder used to decode each line.
Raises: