You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by je...@apache.org on 2014/09/03 16:00:11 UTC

[1/5] git commit: [#7628] ticket:646 Fix trove update to handle duplicates

Repository: allura
Updated Branches:
  refs/heads/je/42cc_7628 [created] e15c41576


[#7628] ticket:646 Fix trove update to handle duplicates

It helps find each one of them.


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/862c7255
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/862c7255
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/862c7255

Branch: refs/heads/je/42cc_7628
Commit: 862c725565937bb188cd31d09e00a998ae74a45a
Parents: e790049
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed Sep 3 13:11:42 2014 +0300
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Sep 3 13:11:42 2014 +0300

----------------------------------------------------------------------
 Allura/allura/command/create_trove_categories.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/862c7255/Allura/allura/command/create_trove_categories.py
----------------------------------------------------------------------
diff --git a/Allura/allura/command/create_trove_categories.py b/Allura/allura/command/create_trove_categories.py
index e520214..0501f49 100644
--- a/Allura/allura/command/create_trove_categories.py
+++ b/Allura/allura/command/create_trove_categories.py
@@ -38,7 +38,7 @@ class CreateTroveCategoriesCommand(base.Command):
 
     # NOTE: order is important
     # To add new migration append it's name to following list,
-    # and cretate method m__<migration_name>
+    # and create method m__<migration_name>
     migrations = [
         'add_agpl_and_lppl',
         'sync',
@@ -58,12 +58,13 @@ class CreateTroveCategoriesCommand(base.Command):
         M.TroveCategory(**data)
 
     def update_trove_cat(self, trove_cat_id, attr_dict):
-        t = M.TroveCategory.query.get(trove_cat_id=trove_cat_id)
-        if not t:
+        ts = M.TroveCategory.query.find(dict(trove_cat_id=trove_cat_id))
+        if ts.count() < 1:
             sys.exit("Couldn't find TroveCategory with trove_cat_id=%s" %
                      trove_cat_id)
-        for k, v in attr_dict.iteritems():
-            setattr(t, k, v)
+        for t in ts:
+            for k, v in attr_dict.iteritems():
+                setattr(t, k, v)
 
     # patching to avoid a *lot* of event hooks firing, and taking a long long time
     @patch.object(M.project.TroveCategoryMapperExtension, 'after_insert', Mock())


[5/5] git commit: [#7628] ticket:646 Fix agpl duplicates discovery

Posted by je...@apache.org.
[#7628] ticket:646 Fix agpl duplicates discovery


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/e15c4157
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/e15c4157
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/e15c4157

Branch: refs/heads/je/42cc_7628
Commit: e15c415761bb09c7ee12758f578815dd26ca119a
Parents: 64efd59
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed Sep 3 16:42:08 2014 +0300
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Sep 3 16:42:08 2014 +0300

----------------------------------------------------------------------
 Allura/allura/scripts/remove_duplicate_troves.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/e15c4157/Allura/allura/scripts/remove_duplicate_troves.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/remove_duplicate_troves.py b/Allura/allura/scripts/remove_duplicate_troves.py
index 5249498..7866b2e 100644
--- a/Allura/allura/scripts/remove_duplicate_troves.py
+++ b/Allura/allura/scripts/remove_duplicate_troves.py
@@ -82,10 +82,13 @@ class RemoveDuplicateTroves(ScriptTask):
 
     @classmethod
     def _find_duplicates(cls):
-        # agpl is present twice with different cat_id
-        # (update in creation command updated only one of duplicates),
-        # so code below will not catch it
-        dups = M.TroveCategory.query.find({'shortname': 'agpl'}).all()
+        dups = []
+        agpl = M.TroveCategory.query.find({'shortname': 'agpl'}).all()
+        if len(agpl) > 1:
+            # agpl is present twice with different cat_id
+            # (update in creation command updated only one of duplicates),
+            # so code below will not catch it
+            dups.extend(agpl)
         for cat in M.TroveCategory.query.find():
             if M.TroveCategory.query.find({
                 'shortname': cat.shortname,


[4/5] git commit: [#7628] ticket:646 Reassign categories if different projects use different duplicates

Posted by je...@apache.org.
[#7628] ticket:646 Reassign categories if different projects use different duplicates


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/64efd59e
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/64efd59e
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/64efd59e

Branch: refs/heads/je/42cc_7628
Commit: 64efd59e036b1c90e4c11bf641cece04db661fae
Parents: 487de12
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed Sep 3 16:40:13 2014 +0300
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Sep 3 16:40:13 2014 +0300

----------------------------------------------------------------------
 .../allura/scripts/remove_duplicate_troves.py   | 40 ++++++++++++++++----
 1 file changed, 32 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/64efd59e/Allura/allura/scripts/remove_duplicate_troves.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/remove_duplicate_troves.py b/Allura/allura/scripts/remove_duplicate_troves.py
index aab0bf1..5249498 100644
--- a/Allura/allura/scripts/remove_duplicate_troves.py
+++ b/Allura/allura/scripts/remove_duplicate_troves.py
@@ -31,6 +31,18 @@ log = logging.getLogger(__name__)
 
 
 class RemoveDuplicateTroves(ScriptTask):
+    
+    trove_types = [
+        'trove_root_database',
+        'trove_developmentstatus',
+        'trove_audience',
+        'trove_license',
+        'trove_os',
+        'trove_language',
+        'trove_topic',
+        'trove_natlanguage',
+        'trove_environment',
+    ]
 
     @classmethod
     def execute(cls, options):
@@ -49,19 +61,31 @@ class RemoveDuplicateTroves(ScriptTask):
             priority = [p[0] for p in priority]
             live, kill = priority[0], priority[1:]
             log.info('%s will live %s will die', live, kill)
-            if sum([len(projects_with_category[_id]) for _id in kill]) == 0:
-                # Duplicates are used nowhere
-                log.info('Removing categories %s', kill)
-                if not options.dry_run:
-                    M.TroveCategory.query.remove({'_id': {'$in': kill}})
-            else:
+            if sum([len(projects_with_category[_id]) for _id in kill]) > 0:
                 # Duplicates are used somewhere, need to reasign for all projects that use them
-                pass
+                projects = []
+                ids_to_kill = set(kill)
+                for p in [projects_with_category[_id] for _id in kill]:
+                    projects.extend(p)
+                for p in projects:
+                    for tt in cls.trove_types:
+                        _ids = ids_to_kill.intersection(getattr(p, tt))
+                        for _id in _ids:
+                            log.info('Removing %s from %s.%s and adding %s instead', _id, p.shortname, tt, live)
+                            if not options.dry_run:
+                                getattr(p, tt).remove(_id)
+                                getattr(p, tt).append(live)
+            log.info('Removing categories %s', kill)
+            if not options.dry_run:
+                M.TroveCategory.query.remove({'_id': {'$in': kill}})
             ThreadLocalORMSession.flush_all()
 
     @classmethod
     def _find_duplicates(cls):
-        dups = []
+        # agpl is present twice with different cat_id
+        # (update in creation command updated only one of duplicates),
+        # so code below will not catch it
+        dups = M.TroveCategory.query.find({'shortname': 'agpl'}).all()
         for cat in M.TroveCategory.query.find():
             if M.TroveCategory.query.find({
                 'shortname': cat.shortname,


[2/5] git commit: [#7628] ticket:646 Remove duplicates from command

Posted by je...@apache.org.
[#7628] ticket:646 Remove duplicates from command


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/da026b83
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/da026b83
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/da026b83

Branch: refs/heads/je/42cc_7628
Commit: da026b8388e023aeb283ec99aff03d198db0a99f
Parents: 862c725
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed Sep 3 13:20:16 2014 +0300
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Sep 3 13:20:16 2014 +0300

----------------------------------------------------------------------
 .../allura/command/create_trove_categories.py   | 69 --------------------
 1 file changed, 69 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/da026b83/Allura/allura/command/create_trove_categories.py
----------------------------------------------------------------------
diff --git a/Allura/allura/command/create_trove_categories.py b/Allura/allura/command/create_trove_categories.py
index 0501f49..2f2fb9f 100644
--- a/Allura/allura/command/create_trove_categories.py
+++ b/Allura/allura/command/create_trove_categories.py
@@ -40,7 +40,6 @@ class CreateTroveCategoriesCommand(base.Command):
     # To add new migration append it's name to following list,
     # and create method m__<migration_name>
     migrations = [
-        'add_agpl_and_lppl',
         'sync',
         'set_parent_only',
         'add_license',
@@ -1458,68 +1457,6 @@ class CreateTroveCategoriesCommand(base.Command):
             getattr(self, 'm__' + name)()
             session(M.TroveCategory).flush()
 
-    def m__add_agpl_and_lppl(self):
-        M.TroveCategory(trove_cat_id=670,
-                        trove_parent_id=14,
-                        shortname="agpl",
-                        fullname="Affero GNU Public License",
-                        fullpath="License :: OSI-Approved Open Source :: Affero GNU Public License")
-        M.TroveCategory(trove_cat_id=862,
-                        trove_parent_id=14,
-                        shortname="lppl",
-                        fullname="LaTeX Project Public License",
-                        fullpath="License :: OSI-Approved Open Source :: LaTeX Project Public License")
-        M.TroveCategory(trove_cat_id=655,
-                        trove_parent_id=432,
-                        shortname="win64",
-                        fullname="64-bit MS Windows",
-                        fullpath="Operating System :: Grouping and Descriptive Categories :: 64-bit MS Windows")
-        M.TroveCategory(trove_cat_id=657,
-                        trove_parent_id=418,
-                        shortname="vista",
-                        fullname="Vista",
-                        fullpath="Operating System :: Modern (Vendor-Supported) Desktop Operating Systems :: Vista")
-        M.TroveCategory(trove_cat_id=851,
-                        trove_parent_id=418,
-                        shortname="win7",
-                        fullname="Windows 7",
-                        fullpath="Operating System :: Modern (Vendor-Supported) Desktop Operating Systems :: Windows 7")
-        M.TroveCategory(trove_cat_id=728,
-                        trove_parent_id=315,
-                        shortname="android",
-                        fullname="Android",
-                        fullpath="Operating System :: Handheld/Embedded Operating Systems :: Android")
-        M.TroveCategory(trove_cat_id=780,
-                        trove_parent_id=315,
-                        shortname="ios",
-                        fullname="Apple iPhone",
-                        fullpath="Operating System :: Handheld/Embedded Operating Systems :: Apple iPhone")
-        M.TroveCategory(trove_cat_id=863,
-                        trove_parent_id=534,
-                        shortname="architects",
-                        fullname="Architects",
-                        fullpath="Intended Audience :: by End-User Class :: Architects")
-        M.TroveCategory(trove_cat_id=864,
-                        trove_parent_id=534,
-                        shortname="auditors",
-                        fullname="Auditors",
-                        fullpath="Intended Audience :: by End-User Class :: Auditors")
-        M.TroveCategory(trove_cat_id=865,
-                        trove_parent_id=534,
-                        shortname="testers",
-                        fullname="Testers",
-                        fullpath="Intended Audience :: by End-User Class :: Testers")
-        M.TroveCategory(trove_cat_id=866,
-                        trove_parent_id=534,
-                        shortname="secpros",
-                        fullname="Security Professionals",
-                        fullpath="Intended Audience :: by End-User Class :: Security Professionals")
-        M.TroveCategory(trove_cat_id=867,
-                        trove_parent_id=535,
-                        shortname="secindustry",
-                        fullname="Security",
-                        fullpath="Intended Audience :: by Industry or Sector :: Security")
-
     def m__sync(self):
         self.create_trove_cat(
             (639, 14, "cpal", "Common Public Attribution License 1.0 (CPAL)",
@@ -1605,12 +1542,6 @@ class CreateTroveCategoriesCommand(base.Command):
             (678, 14, "boostlicense", "Boost Software License (BSL1.0)",
              "License :: OSI-Approved Open Source :: Boost Software License (BSL1.0)"))
         self.create_trove_cat(
-            (679, 14, "gplv3", "GNU General Public License version 3.0 (GPLv3)",
-             "License :: OSI-Approved Open Source :: GNU General Public License version 3.0 (GPLv3)"))
-        self.create_trove_cat(
-            (680, 14, "lgplv3", "GNU Library or ""Lesser"" General Public License version 3.0 (LGPLv3)",
-             "License :: OSI-Approved Open Source :: GNU Library or ""Lesser"" General Public License version 3.0 (LGPLv3)"))
-        self.create_trove_cat(
             (681, 14, "isclicense", "ISC License", "License :: OSI-Approved Open Source :: ISC License"))
         self.create_trove_cat((682, 14, "multicslicense", "Multics License",
                               "License :: OSI-Approved Open Source :: Multics License"))


[3/5] git commit: [#7628] ticket:646 Command to remove duplicate trove categories

Posted by je...@apache.org.
[#7628] ticket:646 Command to remove duplicate trove categories


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/487de12f
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/487de12f
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/487de12f

Branch: refs/heads/je/42cc_7628
Commit: 487de12fbded799c03b92bfb7e525c0233707418
Parents: da026b8
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed Sep 3 16:14:43 2014 +0300
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Sep 3 16:14:43 2014 +0300

----------------------------------------------------------------------
 .../allura/scripts/remove_duplicate_troves.py   | 103 +++++++++++++++++++
 1 file changed, 103 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/487de12f/Allura/allura/scripts/remove_duplicate_troves.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/remove_duplicate_troves.py b/Allura/allura/scripts/remove_duplicate_troves.py
new file mode 100644
index 0000000..aab0bf1
--- /dev/null
+++ b/Allura/allura/scripts/remove_duplicate_troves.py
@@ -0,0 +1,103 @@
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+
+import argparse
+import logging
+from itertools import groupby
+from collections import defaultdict
+from operator import itemgetter
+
+from ming.odm import ThreadLocalORMSession
+
+from allura.scripts import ScriptTask
+from allura import model as M
+
+
+log = logging.getLogger(__name__)
+
+
+class RemoveDuplicateTroves(ScriptTask):
+
+    @classmethod
+    def execute(cls, options):
+        duplicates = cls._find_duplicates()
+        log.info('Found %s duplicate categories: %s', len(duplicates), duplicates.keys())
+        for name, dups in duplicates.iteritems():
+            projects_with_category = {}
+            for dup in dups:
+                projects = cls._projects_with_category(dup._id)
+                projects_with_category[dup._id] = projects
+            log.info('Following projects are using category %s:', name)
+            for _id, ps in projects_with_category.iteritems():
+                log.info('  with id %s: %s', _id, [p.shortname for p in ps])
+            priority = [(i, len(ps)) for i, ps in projects_with_category.items()]
+            priority = sorted(priority, key=itemgetter(1), reverse=True)
+            priority = [p[0] for p in priority]
+            live, kill = priority[0], priority[1:]
+            log.info('%s will live %s will die', live, kill)
+            if sum([len(projects_with_category[_id]) for _id in kill]) == 0:
+                # Duplicates are used nowhere
+                log.info('Removing categories %s', kill)
+                if not options.dry_run:
+                    M.TroveCategory.query.remove({'_id': {'$in': kill}})
+            else:
+                # Duplicates are used somewhere, need to reasign for all projects that use them
+                pass
+            ThreadLocalORMSession.flush_all()
+
+    @classmethod
+    def _find_duplicates(cls):
+        dups = []
+        for cat in M.TroveCategory.query.find():
+            if M.TroveCategory.query.find({
+                'shortname': cat.shortname,
+                'trove_cat_id': cat.trove_cat_id,
+                'trove_parent_id': cat.trove_parent_id,
+                'fullname': cat.fullname,
+                'fullpath': cat.fullpath,
+            }).count() > 1:
+                dups.append(cat)
+        result = defaultdict(list)
+        for k, v in groupby(dups, lambda x: x.shortname):
+            result[k].extend(list(v))
+        return result
+
+    @classmethod
+    def _projects_with_category(cls, _id):
+        p = M.Project.query.find({'$or': [
+            {'trove_root_database': _id},
+            {'trove_developmentstatus': _id},
+            {'trove_audience': _id},
+            {'trove_license': _id},
+            {'trove_os': _id},
+            {'trove_language': _id},
+            {'trove_topic': _id},
+            {'trove_natlanguage': _id},
+            {'trove_environment':_id},
+        ]})
+        return p.all()
+
+    @classmethod
+    def parser(cls):
+        parser = argparse.ArgumentParser(description='Remove duplicate troves')
+        parser.add_argument('--dry-run', action='store_true', dest='dry_run',
+                            default=False, help='Print what will be changed but do not change anything')
+        return parser
+
+
+if __name__ == '__main__':
+    RemoveDuplicateTroves.main()