You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by ma...@apache.org on 2014/01/09 00:34:45 UTC

git commit: Fixing updater diff by converting TaskConfig to sorted json tuples.

Updated Branches:
  refs/heads/master 19e087509 -> c7fa8bb27


Fixing updater diff by converting TaskConfig to sorted json tuples.

Reviewed at https://reviews.apache.org/r/16311/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/c7fa8bb2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/c7fa8bb2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/c7fa8bb2

Branch: refs/heads/master
Commit: c7fa8bb2761799fb12ecc41407d3ed963459504e
Parents: 19e0875
Author: Maxim Khutornenko <mk...@twitter.com>
Authored: Wed Jan 8 15:31:39 2014 -0800
Committer: Maxim Khutornenko <mk...@twitter.com>
Committed: Wed Jan 8 15:31:39 2014 -0800

----------------------------------------------------------------------
 .../python/apache/aurora/client/api/updater.py  | 34 +++++++++++++++++---
 .../apache/aurora/client/api/test_updater.py    | 26 ++++++++++++++-
 2 files changed, 55 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/c7fa8bb2/src/main/python/apache/aurora/client/api/updater.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/api/updater.py b/src/main/python/apache/aurora/client/api/updater.py
index e364f26..c885340 100644
--- a/src/main/python/apache/aurora/client/api/updater.py
+++ b/src/main/python/apache/aurora/client/api/updater.py
@@ -1,3 +1,4 @@
+import json
 from collections import namedtuple
 from difflib import unified_diff
 
@@ -17,10 +18,13 @@ from gen.apache.aurora.ttypes import (
     ResponseCode,
     TaskQuery,
 )
+
 from .updater_util import FailureThreshold, UpdaterConfig
 from .instance_watcher import InstanceWatcher
 from .scheduler_client import SchedulerProxy
 
+from thrift.protocol import TJSONProtocol
+from thrift.TSerialization import serialize
 
 class Updater(object):
   """Update the instances of a job in batches."""
@@ -163,6 +167,31 @@ class Updater(object):
     if failed_instances:
       log.error('Rollback failed for instances: %s' % failed_instances)
 
+  def _hashable(self, element):
+    if isinstance(element, (list, set)):
+      return tuple(sorted(self._hashable(item) for item in element))
+    elif isinstance(element, dict):
+      return tuple(
+          sorted((self._hashable(key), self._hashable(value)) for (key, value) in element.items())
+      )
+    return element
+
+  def _thrift_to_json(self, config):
+    return json.loads(
+        serialize(config, protocol_factory=TJSONProtocol.TSimpleJSONProtocolFactory()))
+
+  def _diff_configs(self, from_config, to_config):
+    # Thrift objects do not correctly compare against each other due to the unhashable nature
+    # of python sets. That results in occasional diff failures with the following symptoms:
+    # - Sets are not equal even though their reprs are identical;
+    # - Items are reordered within thrift structs;
+    # - Items are reordered within sets;
+    # To overcome all the above, thrift objects are converted into JSON dicts to flatten out
+    # thrift type hierarchy. Next, JSONs are recursively converted into nested tuples to
+    # ensure proper ordering on compare.
+    return ''.join(unified_diff(repr(self._hashable(self._thrift_to_json(from_config))),
+                                repr(self._hashable(self._thrift_to_json(to_config)))))
+
   def _create_kill_add_lists(self, instance_ids, operation_configs):
     """Determines a particular action (kill or add) to use for every instance in instance_ids.
 
@@ -179,10 +208,7 @@ class Updater(object):
       to_config = operation_configs.to_config.get(instance_id)
 
       if from_config and to_config:
-        # Sort internal dicts before comparing to rule out differences due to hashing.
-        diff_output = ''.join(unified_diff(
-          str(sorted(from_config.__dict__.items(), key=lambda x: x[0])),
-          str(sorted(to_config.__dict__.items(), key=lambda x: x[0]))))
+        diff_output = self._diff_configs(from_config, to_config)
         if diff_output:
           log.debug('Task configuration changed for instance [%s]:\n%s' % (instance_id, diff_output))
           to_kill.append(instance_id)

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/c7fa8bb2/src/test/python/apache/aurora/client/api/test_updater.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/api/test_updater.py b/src/test/python/apache/aurora/client/api/test_updater.py
index 5843bb4..96371aa 100644
--- a/src/test/python/apache/aurora/client/api/test_updater.py
+++ b/src/test/python/apache/aurora/client/api/test_updater.py
@@ -18,6 +18,7 @@ from gen.apache.aurora.ttypes import (
   JobConfigValidation,
   JobKey,
   Identity,
+  LimitConstraint,
   Lock,
   LockKey,
   LockValidation,
@@ -29,13 +30,14 @@ from gen.apache.aurora.ttypes import (
   ScheduleStatusResult,
   ScheduledTask,
   TaskConfig,
+  TaskConstraint,
   TaskQuery,
+  ValueConstraint,
 )
 
 from mox import MockObject, Replay, Verify
 from pytest import raises
 
-
 # Debug output helper -> enables log.* in source.
 if 'UPDATER_DEBUG' in environ:
   from twitter.common import log
@@ -670,3 +672,25 @@ class UpdaterTest(TestCase):
     self.update_and_expect_ok()
     self.verify_mocks()
 
+  def test_diff_unordered_configs(self):
+    """Diff between two config objects with different repr but identical content works ok."""
+    from_config = self.make_task_configs()[0]
+    from_config.constraints = set([
+        Constraint(name='value', constraint=ValueConstraint(values=set(['1', '2']))),
+        Constraint(name='limit', constraint=TaskConstraint(limit=LimitConstraint(limit=int(10))))])
+    from_config.taskLinks = {'task1': 'link1', 'task2': 'link2'}
+    from_config.packages = set([
+      Package(name='n2', role='r2', version=4),
+      Package(role='r1', name='n1', version=1)])
+    from_config.executorConfig = ExecutorConfig(name='test', data='test data')
+    from_config.requestedPorts = set(['3424', '142', '45235'])
+
+    # Deepcopy() almost guarantees from_config != to_config due to a different sequence of
+    # dict insertions. That in turn generates unequal json objects. The ideal here would be to
+    # assert to_config != from_config but that would produce a flaky test as I have observed
+    # the opposite on rare occasions as the ordering is not stable between test runs.
+    to_config = deepcopy(from_config)
+
+    diff_result = self._updater._diff_configs(from_config, to_config)
+    assert diff_result == "", (
+      'diff result must be empty but was: %s' % diff_result)