You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by pa...@apache.org on 2021/03/18 05:30:42 UTC
[beam] branch master updated: [BEAM-11979] Ignore not serializable
filter fields in python MongoDBIO display data
This is an automated email from the ASF dual-hosted git repository.
pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new d6436b3 [BEAM-11979] Ignore not serializable filter fields in python MongoDBIO display data
new 9791ef9 Merge pull request #14237 from [BEAM-11979] Ignore not serializable filter fields in python MongoDBI…
d6436b3 is described below
commit d6436b340a2ac06e62ed4d5f2bb6c92f294e3997
Author: Yichi Zhang <zy...@google.com>
AuthorDate: Mon Mar 15 12:40:21 2021 -0700
[BEAM-11979] Ignore not serializable filter fields in python MongoDBIO display data
---
sdks/python/apache_beam/io/mongodbio.py | 28 ++-----------------
sdks/python/apache_beam/io/mongodbio_test.py | 42 ----------------------------
2 files changed, 2 insertions(+), 68 deletions(-)
diff --git a/sdks/python/apache_beam/io/mongodbio.py b/sdks/python/apache_beam/io/mongodbio.py
index 61d8521..65b0de9 100644
--- a/sdks/python/apache_beam/io/mongodbio.py
+++ b/sdks/python/apache_beam/io/mongodbio.py
@@ -74,7 +74,6 @@ import json
import logging
import math
import struct
-import urllib
import apache_beam as beam
from apache_beam.io import iobase
@@ -261,12 +260,11 @@ class _BoundedMongoSource(iobase.BoundedSource):
def display_data(self):
res = super(_BoundedMongoSource, self).display_data()
- res['uri'] = _mask_uri_password(self.uri)
res['database'] = self.db
res['collection'] = self.coll
- res['filter'] = json.dumps(self.filter)
+ res['filter'] = json.dumps(
+ self.filter, default=lambda x: 'not_serializable(%s)' % str(x))
res['projection'] = str(self.projection)
- res['mongo_client_spec'] = json.dumps(_mask_spec_password(self.spec))
res['bucket_auto'] = self.bucket_auto
return res
@@ -565,10 +563,8 @@ class _WriteMongoFn(DoFn):
def display_data(self):
res = super(_WriteMongoFn, self).display_data()
- res['uri'] = _mask_uri_password(self.uri)
res['database'] = self.db
res['collection'] = self.coll
- res['mongo_client_params'] = json.dumps(_mask_spec_password(self.spec))
res['batch_size'] = self.batch_size
return res
@@ -612,23 +608,3 @@ class _MongoSink(object):
def __exit__(self, exc_type, exc_val, exc_tb):
if self.client is not None:
self.client.close()
-
-
-def _mask_uri_password(uri):
- # Masks password in uri if present
- if uri:
- components = urllib.parse.urlsplit(uri)
- if components.password:
- replaced = components._replace(
- netloc='{}:{}@{}'.format(
- components.username, '******', components.hostname))
- uri = replaced.geturl()
- return uri
-
-
-def _mask_spec_password(spec):
- # Masks password in spec if present
- if spec and 'password' in spec:
- spec = spec.copy()
- spec['password'] = '******'
- return spec
diff --git a/sdks/python/apache_beam/io/mongodbio_test.py b/sdks/python/apache_beam/io/mongodbio_test.py
index 353fafb..5ee4518 100644
--- a/sdks/python/apache_beam/io/mongodbio_test.py
+++ b/sdks/python/apache_beam/io/mongodbio_test.py
@@ -430,32 +430,9 @@ class MongoSourceTest(unittest.TestCase):
def test_display_data(self):
data = self.mongo_source.display_data()
- self.assertTrue('uri' in data)
self.assertTrue('database' in data)
self.assertTrue('collection' in data)
- def test_display_data_mask_password(self):
- # Uri without password
- data = self.mongo_source.display_data()
- self.assertTrue('uri' in data)
- self.assertTrue(data['uri'] == 'mongodb://test')
- # Password is masked in the uri if present
- mongo_source = _BoundedMongoSource(
- 'mongodb+srv://user:password@test.mongodb.net/testdb',
- 'testdb',
- 'testcoll',
- extra_client_params={
- 'user': 'user', 'password': 'password'
- })
- data = mongo_source.display_data()
- self.assertTrue('uri' in data)
- self.assertTrue(
- data['uri'] == 'mongodb+srv://user:******@test.mongodb.net/testdb')
- # Password is masked in the client spec if present
- self.assertTrue('mongo_client_spec' in data)
- self.assertTrue(
- data['mongo_client_spec'] == '{"user": "user", "password": "******"}')
-
@parameterized_class(('bucket_auto', ), [(False, ), (True, )])
class ReadFromMongoDBTest(unittest.TestCase):
@@ -513,25 +490,6 @@ class WriteMongoFnTest(unittest.TestCase):
data = _WriteMongoFn(batch_size=10).display_data()
self.assertEqual(10, data['batch_size'])
- def test_display_data_mask_password(self):
- # Uri without password
- data = _WriteMongoFn(uri='mongodb://test').display_data()
- self.assertTrue('uri' in data)
- self.assertTrue(data['uri'] == 'mongodb://test')
- # Password is masked in the uri if present
- data = _WriteMongoFn(
- uri='mongodb+srv://user:password@test.mongodb.net/testdb',
- extra_params={
- 'user': 'user', 'password': 'password'
- }).display_data()
- self.assertTrue('uri' in data)
- self.assertTrue(
- data['uri'] == 'mongodb+srv://user:******@test.mongodb.net/testdb')
- # Password is masked in the client spec if present
- self.assertTrue('mongo_client_params' in data)
- self.assertTrue(
- data['mongo_client_params'] == '{"user": "user", "password": "******"}')
-
class MongoSinkTest(unittest.TestCase):
@mock.patch('apache_beam.io.mongodbio.MongoClient')