You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2015/12/11 20:32:59 UTC
[2/5] allura git commit: [#8033] move
scripts/create-allura-sitemap.py to ScriptTask
[#8033] move scripts/create-allura-sitemap.py to ScriptTask
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/2f81405d
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/2f81405d
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/2f81405d
Branch: refs/heads/db/8033
Commit: 2f81405dd2d722263b27a883f009ee5c747621cc
Parents: 47b6504
Author: Dave Brondsema <da...@brondsema.net>
Authored: Fri Dec 11 12:14:12 2015 -0500
Committer: Dave Brondsema <da...@brondsema.net>
Committed: Fri Dec 11 12:16:49 2015 -0500
----------------------------------------------------------------------
Allura/allura/scripts/create_sitemap_files.py | 174 +++++++++++++++++++
Allura/docs/getting_started/administration.rst | 16 +-
scripts/create-allura-sitemap.py | 175 --------------------
3 files changed, 182 insertions(+), 183 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/2f81405d/Allura/allura/scripts/create_sitemap_files.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/create_sitemap_files.py b/Allura/allura/scripts/create_sitemap_files.py
new file mode 100644
index 0000000..e4d98a9
--- /dev/null
+++ b/Allura/allura/scripts/create_sitemap_files.py
@@ -0,0 +1,174 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Generate Allura sitemap xml files. You will need to configure your webserver to serve the files.
+
+This takes a while to run on a prod-sized data set. There are a couple of
+things that would make it faster, if we need/want to.
+
+1. Monkeypatch forgetracker.model.ticket.Globals.bin_count to skip the
+ refresh (Solr search) and just return zero for everything, since we don't
+ need bin counts for the sitemap.
+
+2. Use multiprocessing to distribute the offsets to n subprocesses.
+"""
+
+import os
+from datetime import datetime
+import argparse
+
+from jinja2 import Template
+import pylons
+import webob
+from pylons import tmpl_context as c
+from ming.orm import ThreadLocalORMSession
+
+from allura import model as M
+from allura.lib import security, utils
+from allura.scripts import ScriptTask
+
+
+MAX_SITEMAP_URLS = 50000
+BASE_URL = 'http://sourceforge.net'
+
+INDEX_TEMPLATE = """\
+<?xml version="1.0" encoding="utf-8"?>
+<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+ {% for sitemap in sitemaps -%}
+ <sitemap>
+ <loc>{{ sitemap }}</loc>
+ <lastmod>{{ now }}</lastmod>
+ </sitemap>
+ {%- endfor %}
+</sitemapindex>
+"""
+
+SITEMAP_TEMPLATE = """\
+<?xml version="1.0" encoding="utf-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+ {% for loc in locs -%}
+ <url>
+ <loc>{{ loc.url }}</loc>
+ <lastmod>{{ loc.date }}</lastmod>
+ <changefreq>daily</changefreq>
+ </url>
+ {% endfor %}
+</urlset>
+"""
+
+
+class CreateSitemapFiles(ScriptTask):
+
+ @classmethod
+ def execute(cls, options):
+ # This script will indirectly call app.sidebar_menu() for every app in
+ # every project. Some of the sidebar_menu methods expect the
+ # pylons.request threadlocal object to be present. So, we're faking it.
+ #
+ # The fact that this isn't a 'real' request doesn't matter for the
+ # purposes of the sitemap.
+ pylons.request._push_object(webob.Request.blank('/'))
+
+ output_path = options.output_dir
+ if os.path.exists(output_path):
+ raise Exception('%s directory already exists.' % output_path)
+ os.mkdir(output_path)
+
+ now = datetime.utcnow().date()
+ sitemap_content_template = Template(SITEMAP_TEMPLATE)
+
+ def write_sitemap(urls, file_no):
+ sitemap_content = sitemap_content_template.render(dict(now=now, locs=urls))
+ with open(os.path.join(output_path, 'sitemap-%d.xml' % file_no), 'w') as f:
+ f.write(sitemap_content)
+
+ creds = security.Credentials.get()
+ locs = []
+ file_count = 0
+
+ nbhd_id = []
+ if options.neighborhood:
+ prefix = ['/%s/' % n for n in options.neighborhood]
+ nbhd_id = [nbhd._id for nbhd in M.Neighborhood.query.find({'url_prefix': {'$in': prefix}})]
+
+ # write sitemap files, MAX_SITEMAP_URLS per file
+ for chunk in utils.chunked_find(M.Project, {'deleted': False, 'neighborhood_id': {'$nin': nbhd_id}}):
+ for p in chunk:
+ c.project = p
+ try:
+ for s in p.sitemap(excluded_tools=['git', 'hg', 'svn']):
+ url = BASE_URL + s.url if s.url[0] == '/' else s.url
+ locs.append({'url': url,
+ 'date': p.last_updated.strftime("%Y-%m-%d")})
+
+ except Exception, e:
+ print "Error creating sitemap for project '%s': %s" %\
+ (p.shortname, e)
+ creds.clear()
+ if len(locs) >= options.urls_per_file:
+ write_sitemap(locs[:options.urls_per_file], file_count)
+ del locs[:options.urls_per_file]
+ file_count += 1
+ M.main_orm_session.clear()
+ ThreadLocalORMSession.close_all()
+ while locs:
+ write_sitemap(locs[:options.urls_per_file], file_count)
+ del locs[:options.urls_per_file]
+ file_count += 1
+ # write sitemap index file
+ if file_count:
+ sitemap_index_vars = dict(
+ now=now,
+ sitemaps=[
+ '%s/allura_sitemap/sitemap-%d.xml' % (BASE_URL, n)
+ for n in range(file_count)])
+ sitemap_index_content = Template(
+ INDEX_TEMPLATE).render(sitemap_index_vars)
+ with open(os.path.join(output_path, 'sitemap.xml'), 'w') as f:
+ f.write(sitemap_index_content)
+
+ @classmethod
+ def parser(cls):
+ class Validate(argparse.Action):
+ def __call__(self, parser, namespace, value, option_string=None):
+ value = min(value, MAX_SITEMAP_URLS)
+ setattr(namespace, self.dest, value)
+
+ parser = argparse.ArgumentParser(description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument('-o', '--output-dir',
+ dest='output_dir',
+ default='/tmp/allura_sitemap',
+ help='Output directory (absolute path).'
+ '[default: %(default)s]')
+ parser.add_argument('-u', '--urls-per-file', dest='urls_per_file',
+ default=10000, type=int,
+ help='Number of URLs per sitemap file. [default: %(default)s, max: ' +
+ str(MAX_SITEMAP_URLS) + ']',
+ action=Validate)
+ parser.add_argument('-n', '--neighborhood', dest='neighborhood',
+ help="URL prefix of excluded neighborhood(s)",
+ default=None, nargs='*')
+ return parser
+
+
+def get_parser():
+ return CreateSitemapFiles.parser()
+
+if __name__ == '__main__':
+ CreateSitemapFiles.main()
http://git-wip-us.apache.org/repos/asf/allura/blob/2f81405d/Allura/docs/getting_started/administration.rst
----------------------------------------------------------------------
diff --git a/Allura/docs/getting_started/administration.rst b/Allura/docs/getting_started/administration.rst
index e5d5a10..d1b4b93 100644
--- a/Allura/docs/getting_started/administration.rst
+++ b/Allura/docs/getting_started/administration.rst
@@ -72,10 +72,10 @@ Commands can be discovered and run via the `paster` command when you are in the
Scripts are in the `scripts/` directory and run slightly differently, via `paster script`. An extra
:kbd:`--` is required to separate script arguments from paster arguments. Example::
- paster script development.ini ../scripts/create-allura-sitemap.py -- --help
+ paster script development.ini ../scripts/add_user_to_group.py -- --help
... help output ...
- paster script development.ini ../scripts/create-allura-sitemap.py -- -u 100
+ paster script development.ini ../scripts/add_user_to_group.py -- --nbhd /u/ johndoe Admin
To run these when using docker, prefix with :code:`docker-compose run taskd` and use :code:`docker-dev.ini` like::
@@ -224,15 +224,15 @@ reindex_users.py
:prog: paster script development.ini allura/scripts/reindex_users.py --
-create-allura-sitemap.py
-------------------------
+create_sitemap_files.py
+-----------------------
-*Cannot currently be run as a background task.*
+*Can be run as a background task using task name:* :code:`allura.scripts.create_sitemap_files.CreateSitemapFiles`
.. argparse::
- :file: ../../scripts/create-allura-sitemap.py
- :func: parser
- :prog: paster script development.ini ../scripts/create-allura-sitemap.py --
+ :module: allura.scripts.create_sitemap_files
+ :func: get_parser
+ :prog: paster script development.ini allura/scripts/create_sitemap_files.py --
publicize-neighborhood.py
http://git-wip-us.apache.org/repos/asf/allura/blob/2f81405d/scripts/create-allura-sitemap.py
----------------------------------------------------------------------
diff --git a/scripts/create-allura-sitemap.py b/scripts/create-allura-sitemap.py
deleted file mode 100644
index 9ab8f38..0000000
--- a/scripts/create-allura-sitemap.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Generate Allura sitemap xml files. You will need to configure your webserver to serve the files.
-
-This takes a while to run on a prod-sized data set. There are a couple of
-things that would make it faster, if we need/want to.
-
-1. Monkeypatch forgetracker.model.ticket.Globals.bin_count to skip the
- refresh (Solr search) and just return zero for everything, since we don't
- need bin counts for the sitemap.
-
-2. Use multiprocessing to distribute the offsets to n subprocesses.
-"""
-
-import os
-import sys
-from datetime import datetime
-from jinja2 import Template
-
-import pylons
-import webob
-from pylons import tmpl_context as c
-
-from allura import model as M
-from allura.lib import security, utils
-from ming.orm import ThreadLocalORMSession
-
-MAX_SITEMAP_URLS = 50000
-BASE_URL = 'http://sourceforge.net'
-
-INDEX_TEMPLATE = """\
-<?xml version="1.0" encoding="utf-8"?>
-<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
- {% for sitemap in sitemaps -%}
- <sitemap>
- <loc>{{ sitemap }}</loc>
- <lastmod>{{ now }}</lastmod>
- </sitemap>
- {%- endfor %}
-</sitemapindex>
-"""
-
-SITEMAP_TEMPLATE = """\
-<?xml version="1.0" encoding="utf-8"?>
-<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
- {% for loc in locs -%}
- <url>
- <loc>{{ loc.url }}</loc>
- <lastmod>{{ loc.date }}</lastmod>
- <changefreq>daily</changefreq>
- </url>
- {% endfor %}
-</urlset>
-"""
-
-
-def main(options):
- # This script will indirectly call app.sidebar_menu() for every app in
- # every project. Some of the sidebar_menu methods expect the
- # pylons.request threadlocal object to be present. So, we're faking it.
- #
- # The fact that this isn't a 'real' request doesn't matter for the
- # purposes of the sitemap.
- pylons.request._push_object(webob.Request.blank('/'))
-
- output_path = options.output_dir
- if os.path.exists(output_path):
- sys.exit('Error: %s directory already exists.' % output_path)
- try:
- os.mkdir(output_path)
- except OSError, e:
- sys.exit("Error: Couldn't create %s:\n%s" % (output_path, e))
-
- now = datetime.utcnow().date()
- sitemap_content_template = Template(SITEMAP_TEMPLATE)
-
- def write_sitemap(urls, file_no):
- sitemap_content = sitemap_content_template.render(dict(
- now=now, locs=urls))
- with open(os.path.join(output_path, 'sitemap-%d.xml' % file_no), 'w') as f:
- f.write(sitemap_content)
-
- creds = security.Credentials.get()
- locs = []
- file_count = 0
-
- nbhd_id = []
- if options.neighborhood:
- prefix = ['/%s/' % n for n in options.neighborhood]
- nbhd_id = [nbhd._id for nbhd in M.Neighborhood.query.find({'url_prefix': {'$in': prefix}})]
-
- # write sitemap files, MAX_SITEMAP_URLS per file
- for chunk in utils.chunked_find(M.Project, {'deleted': False, 'neighborhood_id': {'$nin': nbhd_id}}):
- for p in chunk:
- c.project = p
- try:
- for s in p.sitemap(excluded_tools=['git', 'hg', 'svn']):
- url = BASE_URL + s.url if s.url[0] == '/' else s.url
- locs.append({'url': url,
- 'date': p.last_updated.strftime("%Y-%m-%d")})
-
- except Exception, e:
- print "Error creating sitemap for project '%s': %s" %\
- (p.shortname, e)
- creds.clear()
- if len(locs) >= options.urls_per_file:
- write_sitemap(locs[:options.urls_per_file], file_count)
- del locs[:options.urls_per_file]
- file_count += 1
- M.main_orm_session.clear()
- ThreadLocalORMSession.close_all()
- while locs:
- write_sitemap(locs[:options.urls_per_file], file_count)
- del locs[:options.urls_per_file]
- file_count += 1
- # write sitemap index file
- if file_count:
- sitemap_index_vars = dict(
- now=now,
- sitemaps=[
- '%s/allura_sitemap/sitemap-%d.xml' % (BASE_URL, n)
- for n in range(file_count)])
- sitemap_index_content = Template(
- INDEX_TEMPLATE).render(sitemap_index_vars)
- with open(os.path.join(output_path, 'sitemap.xml'), 'w') as f:
- f.write(sitemap_index_content)
-
-
-def parser():
- import argparse
- class Validate(argparse.Action):
- def __call__(self, parser, namespace, value, option_string=None):
- value = min(value, MAX_SITEMAP_URLS)
- setattr(namespace, self.dest, value)
-
- parser = argparse.ArgumentParser(description=__doc__,
- formatter_class=argparse.RawDescriptionHelpFormatter)
- parser.add_argument('-o', '--output-dir',
- dest='output_dir',
- default='/tmp/allura_sitemap',
- help='Output directory (absolute path).'
- '[default: %(default)s]')
- parser.add_argument('-u', '--urls-per-file', dest='urls_per_file',
- default=10000, type=int,
- help='Number of URLs per sitemap file. '
- '[default: %(default)s, max: ' +
- str(MAX_SITEMAP_URLS) + ']',
- action=Validate)
- parser.add_argument('-n', '--neighborhood', dest='neighborhood',
- help="URL prefix of excluded neighborhood(s)",
- default=None, nargs='*')
- return parser
-
-def parse_options():
- return parser().parse_args()
-
-
-if __name__ == '__main__':
- sys.exit(main(parse_options()))