You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by pa...@apache.org on 2021/03/18 05:30:42 UTC

[beam] branch master updated: [BEAM-11979] Ignore not serializable filter fields in python MongoDBIO display data

This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new d6436b3  [BEAM-11979] Ignore not serializable filter fields in python MongoDBIO display data
     new 9791ef9  Merge pull request #14237 from [BEAM-11979] Ignore not serializable filter fields in python MongoDBI…
d6436b3 is described below

commit d6436b340a2ac06e62ed4d5f2bb6c92f294e3997
Author: Yichi Zhang <zy...@google.com>
AuthorDate: Mon Mar 15 12:40:21 2021 -0700

    [BEAM-11979] Ignore not serializable filter fields in python MongoDBIO display data
---
 sdks/python/apache_beam/io/mongodbio.py      | 28 ++-----------------
 sdks/python/apache_beam/io/mongodbio_test.py | 42 ----------------------------
 2 files changed, 2 insertions(+), 68 deletions(-)

diff --git a/sdks/python/apache_beam/io/mongodbio.py b/sdks/python/apache_beam/io/mongodbio.py
index 61d8521..65b0de9 100644
--- a/sdks/python/apache_beam/io/mongodbio.py
+++ b/sdks/python/apache_beam/io/mongodbio.py
@@ -74,7 +74,6 @@ import json
 import logging
 import math
 import struct
-import urllib
 
 import apache_beam as beam
 from apache_beam.io import iobase
@@ -261,12 +260,11 @@ class _BoundedMongoSource(iobase.BoundedSource):
 
   def display_data(self):
     res = super(_BoundedMongoSource, self).display_data()
-    res['uri'] = _mask_uri_password(self.uri)
     res['database'] = self.db
     res['collection'] = self.coll
-    res['filter'] = json.dumps(self.filter)
+    res['filter'] = json.dumps(
+        self.filter, default=lambda x: 'not_serializable(%s)' % str(x))
     res['projection'] = str(self.projection)
-    res['mongo_client_spec'] = json.dumps(_mask_spec_password(self.spec))
     res['bucket_auto'] = self.bucket_auto
     return res
 
@@ -565,10 +563,8 @@ class _WriteMongoFn(DoFn):
 
   def display_data(self):
     res = super(_WriteMongoFn, self).display_data()
-    res['uri'] = _mask_uri_password(self.uri)
     res['database'] = self.db
     res['collection'] = self.coll
-    res['mongo_client_params'] = json.dumps(_mask_spec_password(self.spec))
     res['batch_size'] = self.batch_size
     return res
 
@@ -612,23 +608,3 @@ class _MongoSink(object):
   def __exit__(self, exc_type, exc_val, exc_tb):
     if self.client is not None:
       self.client.close()
-
-
-def _mask_uri_password(uri):
-  # Masks password in uri if present
-  if uri:
-    components = urllib.parse.urlsplit(uri)
-    if components.password:
-      replaced = components._replace(
-          netloc='{}:{}@{}'.format(
-              components.username, '******', components.hostname))
-      uri = replaced.geturl()
-  return uri
-
-
-def _mask_spec_password(spec):
-  # Masks password in spec if present
-  if spec and 'password' in spec:
-    spec = spec.copy()
-    spec['password'] = '******'
-  return spec
diff --git a/sdks/python/apache_beam/io/mongodbio_test.py b/sdks/python/apache_beam/io/mongodbio_test.py
index 353fafb..5ee4518 100644
--- a/sdks/python/apache_beam/io/mongodbio_test.py
+++ b/sdks/python/apache_beam/io/mongodbio_test.py
@@ -430,32 +430,9 @@ class MongoSourceTest(unittest.TestCase):
 
   def test_display_data(self):
     data = self.mongo_source.display_data()
-    self.assertTrue('uri' in data)
     self.assertTrue('database' in data)
     self.assertTrue('collection' in data)
 
-  def test_display_data_mask_password(self):
-    # Uri without password
-    data = self.mongo_source.display_data()
-    self.assertTrue('uri' in data)
-    self.assertTrue(data['uri'] == 'mongodb://test')
-    # Password is masked in the uri if present
-    mongo_source = _BoundedMongoSource(
-        'mongodb+srv://user:password@test.mongodb.net/testdb',
-        'testdb',
-        'testcoll',
-        extra_client_params={
-            'user': 'user', 'password': 'password'
-        })
-    data = mongo_source.display_data()
-    self.assertTrue('uri' in data)
-    self.assertTrue(
-        data['uri'] == 'mongodb+srv://user:******@test.mongodb.net/testdb')
-    # Password is masked in the client spec if present
-    self.assertTrue('mongo_client_spec' in data)
-    self.assertTrue(
-        data['mongo_client_spec'] == '{"user": "user", "password": "******"}')
-
 
 @parameterized_class(('bucket_auto', ), [(False, ), (True, )])
 class ReadFromMongoDBTest(unittest.TestCase):
@@ -513,25 +490,6 @@ class WriteMongoFnTest(unittest.TestCase):
     data = _WriteMongoFn(batch_size=10).display_data()
     self.assertEqual(10, data['batch_size'])
 
-  def test_display_data_mask_password(self):
-    # Uri without password
-    data = _WriteMongoFn(uri='mongodb://test').display_data()
-    self.assertTrue('uri' in data)
-    self.assertTrue(data['uri'] == 'mongodb://test')
-    # Password is masked in the uri if present
-    data = _WriteMongoFn(
-        uri='mongodb+srv://user:password@test.mongodb.net/testdb',
-        extra_params={
-            'user': 'user', 'password': 'password'
-        }).display_data()
-    self.assertTrue('uri' in data)
-    self.assertTrue(
-        data['uri'] == 'mongodb+srv://user:******@test.mongodb.net/testdb')
-    # Password is masked in the client spec if present
-    self.assertTrue('mongo_client_params' in data)
-    self.assertTrue(
-        data['mongo_client_params'] == '{"user": "user", "password": "******"}')
-
 
 class MongoSinkTest(unittest.TestCase):
   @mock.patch('apache_beam.io.mongodbio.MongoClient')