You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by je...@apache.org on 2014/09/03 16:00:11 UTC
[1/5] git commit: [#7628] ticket:646 Fix trove update to handle
duplicates
Repository: allura
Updated Branches:
refs/heads/je/42cc_7628 [created] e15c41576
[#7628] ticket:646 Fix trove update to handle duplicates
It helps find each one of them.
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/862c7255
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/862c7255
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/862c7255
Branch: refs/heads/je/42cc_7628
Commit: 862c725565937bb188cd31d09e00a998ae74a45a
Parents: e790049
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed Sep 3 13:11:42 2014 +0300
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Sep 3 13:11:42 2014 +0300
----------------------------------------------------------------------
Allura/allura/command/create_trove_categories.py | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/862c7255/Allura/allura/command/create_trove_categories.py
----------------------------------------------------------------------
diff --git a/Allura/allura/command/create_trove_categories.py b/Allura/allura/command/create_trove_categories.py
index e520214..0501f49 100644
--- a/Allura/allura/command/create_trove_categories.py
+++ b/Allura/allura/command/create_trove_categories.py
@@ -38,7 +38,7 @@ class CreateTroveCategoriesCommand(base.Command):
# NOTE: order is important
# To add new migration append it's name to following list,
- # and cretate method m__<migration_name>
+ # and create method m__<migration_name>
migrations = [
'add_agpl_and_lppl',
'sync',
@@ -58,12 +58,13 @@ class CreateTroveCategoriesCommand(base.Command):
M.TroveCategory(**data)
def update_trove_cat(self, trove_cat_id, attr_dict):
- t = M.TroveCategory.query.get(trove_cat_id=trove_cat_id)
- if not t:
+ ts = M.TroveCategory.query.find(dict(trove_cat_id=trove_cat_id))
+ if ts.count() < 1:
sys.exit("Couldn't find TroveCategory with trove_cat_id=%s" %
trove_cat_id)
- for k, v in attr_dict.iteritems():
- setattr(t, k, v)
+ for t in ts:
+ for k, v in attr_dict.iteritems():
+ setattr(t, k, v)
# patching to avoid a *lot* of event hooks firing, and taking a long long time
@patch.object(M.project.TroveCategoryMapperExtension, 'after_insert', Mock())
[5/5] git commit: [#7628] ticket:646 Fix agpl duplicates discovery
Posted by je...@apache.org.
[#7628] ticket:646 Fix agpl duplicates discovery
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/e15c4157
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/e15c4157
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/e15c4157
Branch: refs/heads/je/42cc_7628
Commit: e15c415761bb09c7ee12758f578815dd26ca119a
Parents: 64efd59
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed Sep 3 16:42:08 2014 +0300
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Sep 3 16:42:08 2014 +0300
----------------------------------------------------------------------
Allura/allura/scripts/remove_duplicate_troves.py | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/e15c4157/Allura/allura/scripts/remove_duplicate_troves.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/remove_duplicate_troves.py b/Allura/allura/scripts/remove_duplicate_troves.py
index 5249498..7866b2e 100644
--- a/Allura/allura/scripts/remove_duplicate_troves.py
+++ b/Allura/allura/scripts/remove_duplicate_troves.py
@@ -82,10 +82,13 @@ class RemoveDuplicateTroves(ScriptTask):
@classmethod
def _find_duplicates(cls):
- # agpl is present twice with different cat_id
- # (update in creation command updated only one of duplicates),
- # so code below will not catch it
- dups = M.TroveCategory.query.find({'shortname': 'agpl'}).all()
+ dups = []
+ agpl = M.TroveCategory.query.find({'shortname': 'agpl'}).all()
+ if len(agpl) > 1:
+ # agpl is present twice with different cat_id
+ # (update in creation command updated only one of duplicates),
+ # so code below will not catch it
+ dups.extend(agpl)
for cat in M.TroveCategory.query.find():
if M.TroveCategory.query.find({
'shortname': cat.shortname,
[4/5] git commit: [#7628] ticket:646 Reassign categories if different
projects use different duplicates
Posted by je...@apache.org.
[#7628] ticket:646 Reassign categories if different projects use different duplicates
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/64efd59e
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/64efd59e
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/64efd59e
Branch: refs/heads/je/42cc_7628
Commit: 64efd59e036b1c90e4c11bf641cece04db661fae
Parents: 487de12
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed Sep 3 16:40:13 2014 +0300
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Sep 3 16:40:13 2014 +0300
----------------------------------------------------------------------
.../allura/scripts/remove_duplicate_troves.py | 40 ++++++++++++++++----
1 file changed, 32 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/64efd59e/Allura/allura/scripts/remove_duplicate_troves.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/remove_duplicate_troves.py b/Allura/allura/scripts/remove_duplicate_troves.py
index aab0bf1..5249498 100644
--- a/Allura/allura/scripts/remove_duplicate_troves.py
+++ b/Allura/allura/scripts/remove_duplicate_troves.py
@@ -31,6 +31,18 @@ log = logging.getLogger(__name__)
class RemoveDuplicateTroves(ScriptTask):
+
+ trove_types = [
+ 'trove_root_database',
+ 'trove_developmentstatus',
+ 'trove_audience',
+ 'trove_license',
+ 'trove_os',
+ 'trove_language',
+ 'trove_topic',
+ 'trove_natlanguage',
+ 'trove_environment',
+ ]
@classmethod
def execute(cls, options):
@@ -49,19 +61,31 @@ class RemoveDuplicateTroves(ScriptTask):
priority = [p[0] for p in priority]
live, kill = priority[0], priority[1:]
log.info('%s will live %s will die', live, kill)
- if sum([len(projects_with_category[_id]) for _id in kill]) == 0:
- # Duplicates are used nowhere
- log.info('Removing categories %s', kill)
- if not options.dry_run:
- M.TroveCategory.query.remove({'_id': {'$in': kill}})
- else:
+ if sum([len(projects_with_category[_id]) for _id in kill]) > 0:
# Duplicates are used somewhere, need to reasign for all projects that use them
- pass
+ projects = []
+ ids_to_kill = set(kill)
+ for p in [projects_with_category[_id] for _id in kill]:
+ projects.extend(p)
+ for p in projects:
+ for tt in cls.trove_types:
+ _ids = ids_to_kill.intersection(getattr(p, tt))
+ for _id in _ids:
+ log.info('Removing %s from %s.%s and adding %s instead', _id, p.shortname, tt, live)
+ if not options.dry_run:
+ getattr(p, tt).remove(_id)
+ getattr(p, tt).append(live)
+ log.info('Removing categories %s', kill)
+ if not options.dry_run:
+ M.TroveCategory.query.remove({'_id': {'$in': kill}})
ThreadLocalORMSession.flush_all()
@classmethod
def _find_duplicates(cls):
- dups = []
+ # agpl is present twice with different cat_id
+ # (update in creation command updated only one of duplicates),
+ # so code below will not catch it
+ dups = M.TroveCategory.query.find({'shortname': 'agpl'}).all()
for cat in M.TroveCategory.query.find():
if M.TroveCategory.query.find({
'shortname': cat.shortname,
[2/5] git commit: [#7628] ticket:646 Remove duplicates from command
Posted by je...@apache.org.
[#7628] ticket:646 Remove duplicates from command
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/da026b83
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/da026b83
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/da026b83
Branch: refs/heads/je/42cc_7628
Commit: da026b8388e023aeb283ec99aff03d198db0a99f
Parents: 862c725
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed Sep 3 13:20:16 2014 +0300
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Sep 3 13:20:16 2014 +0300
----------------------------------------------------------------------
.../allura/command/create_trove_categories.py | 69 --------------------
1 file changed, 69 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/da026b83/Allura/allura/command/create_trove_categories.py
----------------------------------------------------------------------
diff --git a/Allura/allura/command/create_trove_categories.py b/Allura/allura/command/create_trove_categories.py
index 0501f49..2f2fb9f 100644
--- a/Allura/allura/command/create_trove_categories.py
+++ b/Allura/allura/command/create_trove_categories.py
@@ -40,7 +40,6 @@ class CreateTroveCategoriesCommand(base.Command):
# To add new migration append it's name to following list,
# and create method m__<migration_name>
migrations = [
- 'add_agpl_and_lppl',
'sync',
'set_parent_only',
'add_license',
@@ -1458,68 +1457,6 @@ class CreateTroveCategoriesCommand(base.Command):
getattr(self, 'm__' + name)()
session(M.TroveCategory).flush()
- def m__add_agpl_and_lppl(self):
- M.TroveCategory(trove_cat_id=670,
- trove_parent_id=14,
- shortname="agpl",
- fullname="Affero GNU Public License",
- fullpath="License :: OSI-Approved Open Source :: Affero GNU Public License")
- M.TroveCategory(trove_cat_id=862,
- trove_parent_id=14,
- shortname="lppl",
- fullname="LaTeX Project Public License",
- fullpath="License :: OSI-Approved Open Source :: LaTeX Project Public License")
- M.TroveCategory(trove_cat_id=655,
- trove_parent_id=432,
- shortname="win64",
- fullname="64-bit MS Windows",
- fullpath="Operating System :: Grouping and Descriptive Categories :: 64-bit MS Windows")
- M.TroveCategory(trove_cat_id=657,
- trove_parent_id=418,
- shortname="vista",
- fullname="Vista",
- fullpath="Operating System :: Modern (Vendor-Supported) Desktop Operating Systems :: Vista")
- M.TroveCategory(trove_cat_id=851,
- trove_parent_id=418,
- shortname="win7",
- fullname="Windows 7",
- fullpath="Operating System :: Modern (Vendor-Supported) Desktop Operating Systems :: Windows 7")
- M.TroveCategory(trove_cat_id=728,
- trove_parent_id=315,
- shortname="android",
- fullname="Android",
- fullpath="Operating System :: Handheld/Embedded Operating Systems :: Android")
- M.TroveCategory(trove_cat_id=780,
- trove_parent_id=315,
- shortname="ios",
- fullname="Apple iPhone",
- fullpath="Operating System :: Handheld/Embedded Operating Systems :: Apple iPhone")
- M.TroveCategory(trove_cat_id=863,
- trove_parent_id=534,
- shortname="architects",
- fullname="Architects",
- fullpath="Intended Audience :: by End-User Class :: Architects")
- M.TroveCategory(trove_cat_id=864,
- trove_parent_id=534,
- shortname="auditors",
- fullname="Auditors",
- fullpath="Intended Audience :: by End-User Class :: Auditors")
- M.TroveCategory(trove_cat_id=865,
- trove_parent_id=534,
- shortname="testers",
- fullname="Testers",
- fullpath="Intended Audience :: by End-User Class :: Testers")
- M.TroveCategory(trove_cat_id=866,
- trove_parent_id=534,
- shortname="secpros",
- fullname="Security Professionals",
- fullpath="Intended Audience :: by End-User Class :: Security Professionals")
- M.TroveCategory(trove_cat_id=867,
- trove_parent_id=535,
- shortname="secindustry",
- fullname="Security",
- fullpath="Intended Audience :: by Industry or Sector :: Security")
-
def m__sync(self):
self.create_trove_cat(
(639, 14, "cpal", "Common Public Attribution License 1.0 (CPAL)",
@@ -1605,12 +1542,6 @@ class CreateTroveCategoriesCommand(base.Command):
(678, 14, "boostlicense", "Boost Software License (BSL1.0)",
"License :: OSI-Approved Open Source :: Boost Software License (BSL1.0)"))
self.create_trove_cat(
- (679, 14, "gplv3", "GNU General Public License version 3.0 (GPLv3)",
- "License :: OSI-Approved Open Source :: GNU General Public License version 3.0 (GPLv3)"))
- self.create_trove_cat(
- (680, 14, "lgplv3", "GNU Library or ""Lesser"" General Public License version 3.0 (LGPLv3)",
- "License :: OSI-Approved Open Source :: GNU Library or ""Lesser"" General Public License version 3.0 (LGPLv3)"))
- self.create_trove_cat(
(681, 14, "isclicense", "ISC License", "License :: OSI-Approved Open Source :: ISC License"))
self.create_trove_cat((682, 14, "multicslicense", "Multics License",
"License :: OSI-Approved Open Source :: Multics License"))
[3/5] git commit: [#7628] ticket:646 Command to remove duplicate
trove categories
Posted by je...@apache.org.
[#7628] ticket:646 Command to remove duplicate trove categories
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/487de12f
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/487de12f
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/487de12f
Branch: refs/heads/je/42cc_7628
Commit: 487de12fbded799c03b92bfb7e525c0233707418
Parents: da026b8
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed Sep 3 16:14:43 2014 +0300
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Sep 3 16:14:43 2014 +0300
----------------------------------------------------------------------
.../allura/scripts/remove_duplicate_troves.py | 103 +++++++++++++++++++
1 file changed, 103 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/487de12f/Allura/allura/scripts/remove_duplicate_troves.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/remove_duplicate_troves.py b/Allura/allura/scripts/remove_duplicate_troves.py
new file mode 100644
index 0000000..aab0bf1
--- /dev/null
+++ b/Allura/allura/scripts/remove_duplicate_troves.py
@@ -0,0 +1,103 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import logging
+from itertools import groupby
+from collections import defaultdict
+from operator import itemgetter
+
+from ming.odm import ThreadLocalORMSession
+
+from allura.scripts import ScriptTask
+from allura import model as M
+
+
+log = logging.getLogger(__name__)
+
+
+class RemoveDuplicateTroves(ScriptTask):
+
+ @classmethod
+ def execute(cls, options):
+ duplicates = cls._find_duplicates()
+ log.info('Found %s duplicate categories: %s', len(duplicates), duplicates.keys())
+ for name, dups in duplicates.iteritems():
+ projects_with_category = {}
+ for dup in dups:
+ projects = cls._projects_with_category(dup._id)
+ projects_with_category[dup._id] = projects
+ log.info('Following projects are using category %s:', name)
+ for _id, ps in projects_with_category.iteritems():
+ log.info(' with id %s: %s', _id, [p.shortname for p in ps])
+ priority = [(i, len(ps)) for i, ps in projects_with_category.items()]
+ priority = sorted(priority, key=itemgetter(1), reverse=True)
+ priority = [p[0] for p in priority]
+ live, kill = priority[0], priority[1:]
+ log.info('%s will live %s will die', live, kill)
+ if sum([len(projects_with_category[_id]) for _id in kill]) == 0:
+ # Duplicates are used nowhere
+ log.info('Removing categories %s', kill)
+ if not options.dry_run:
+ M.TroveCategory.query.remove({'_id': {'$in': kill}})
+ else:
+ # Duplicates are used somewhere, need to reasign for all projects that use them
+ pass
+ ThreadLocalORMSession.flush_all()
+
+ @classmethod
+ def _find_duplicates(cls):
+ dups = []
+ for cat in M.TroveCategory.query.find():
+ if M.TroveCategory.query.find({
+ 'shortname': cat.shortname,
+ 'trove_cat_id': cat.trove_cat_id,
+ 'trove_parent_id': cat.trove_parent_id,
+ 'fullname': cat.fullname,
+ 'fullpath': cat.fullpath,
+ }).count() > 1:
+ dups.append(cat)
+ result = defaultdict(list)
+ for k, v in groupby(dups, lambda x: x.shortname):
+ result[k].extend(list(v))
+ return result
+
+ @classmethod
+ def _projects_with_category(cls, _id):
+ p = M.Project.query.find({'$or': [
+ {'trove_root_database': _id},
+ {'trove_developmentstatus': _id},
+ {'trove_audience': _id},
+ {'trove_license': _id},
+ {'trove_os': _id},
+ {'trove_language': _id},
+ {'trove_topic': _id},
+ {'trove_natlanguage': _id},
+ {'trove_environment':_id},
+ ]})
+ return p.all()
+
+ @classmethod
+ def parser(cls):
+ parser = argparse.ArgumentParser(description='Remove duplicate troves')
+ parser.add_argument('--dry-run', action='store_true', dest='dry_run',
+ default=False, help='Print what will be changed but do not change anything')
+ return parser
+
+
+if __name__ == '__main__':
+ RemoveDuplicateTroves.main()