You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2014/05/21 16:44:51 UTC
[03/20] git commit: [#7257] ticket:562 Introduced base class for
indexable objects
[#7257] ticket:562 Introduced base class for indexable objects
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/d107c7f6
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/d107c7f6
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/d107c7f6
Branch: refs/heads/master
Commit: d107c7f65da89cf962e0e0a415724bf0ddbdf717
Parents: 79ac15f
Author: Ferens Dmitriy <fe...@gmail.com>
Authored: Tue Mar 25 16:44:24 2014 +0200
Committer: Dave Brondsema <db...@slashdotmedia.com>
Committed: Wed May 21 14:44:19 2014 +0000
----------------------------------------------------------------------
Allura/allura/lib/plugin.py | 7 ++-
Allura/allura/lib/search.py | 71 +++++++++++++++++++++++-------
Allura/allura/model/artifact.py | 27 +-----------
Allura/allura/model/project.py | 11 +++++
Allura/allura/tasks/index_tasks.py | 21 ++++++++-
Allura/allura/tests/test_tasks.py | 2 +-
Allura/allura/tests/unit/test_solr.py | 56 +++++++++++------------
7 files changed, 122 insertions(+), 73 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/lib/plugin.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/plugin.py b/Allura/allura/lib/plugin.py
index 2a09d73..dcc8cbf 100644
--- a/Allura/allura/lib/plugin.py
+++ b/Allura/allura/lib/plugin.py
@@ -389,7 +389,6 @@ class LdapAuthenticationProvider(AuthenticationProvider):
class ProjectRegistrationProvider(object):
-
'''
Project registration services for Allura. This is a full implementation
and the default. Extend this class with your own if you need to add more
@@ -527,6 +526,12 @@ class ProjectRegistrationProvider(object):
raise forge_exc.ProjectConflict(
'%s already exists in nbhd %s' % (shortname, neighborhood._id))
+ def index_project(self, project):
+ """
+ Put here additional fields given project should be indexed by SOLR.
+ """
+ return dict()
+
def _create_project(self, neighborhood, shortname, project_name, user, user_project, private_project, apps):
'''
Actually create the project, no validation. This should not be called directly
http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/lib/search.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/search.py b/Allura/allura/lib/search.py
index cd637ad..42cf25a 100644
--- a/Allura/allura/lib/search.py
+++ b/Allura/allura/lib/search.py
@@ -31,26 +31,63 @@ from pysolr import SolrError
from allura.lib import helpers as h
from allura.lib.solr import escape_solr_arg
from allura.model import ArtifactReference
-from .markdown_extensions import ForgeExtension
log = getLogger(__name__)
-def solarize(obj):
- if obj is None:
- return None
- doc = obj.index()
- if doc is None:
- return None
- # if index() returned doc without text, assume empty text
- if not doc.get('text'):
- doc['text'] = ''
- # Convert text to plain text (It usually contains markdown markup).
- # To do so, we convert markdown into html, and then strip all html tags.
- text = doc['text']
- text = g.markdown.convert(text)
- doc['text'] = jinja2.Markup.escape(text).striptags()
- return doc
+class SearchIndexable(object):
+
+ """
+ Base class for anything you want to search on.
+ """
+
+ def index_id(self):
+ """
+ Should return a globally unique artifact identifier.
+
+ Used for SOLR ID, shortlinks, and possibly elsewhere.
+ """
+ raise NotImplementedError
+
+ def index(self):
+ """
+ Return a :class:`dict` representation of this Artifact suitable for
+ search indexing.
+
+ Subclasses should implement this, providing a dictionary of solr_field => value.
+ These fields & values will be stored by Solr. Subclasses should call the
+ super().index() and then extend it with more fields.
+
+ You probably want to override at least title and text to have
+ meaningful search results and email senders.
+
+ You can take advantage of Solr's dynamic field typing by adding a type
+ suffix to your field names, e.g.:
+
+ _s (string) (not analyzed)
+ _t (text) (analyzed)
+ _b (bool)
+ _i (int)
+ _f (float)
+ _dt (datetime)
+
+ """
+ raise NotImplementedError
+
+ def solarize(self):
+ doc = self.index()
+ if doc is None:
+ return None
+ # if index() returned doc without text, assume empty text
+ text = doc.setdefault('text', '')
+ # Convert text to plain text (It usually contains markdown markup).
+ # To do so, we convert markdown into html, and then strip all html tags.
+ text = g.markdown.convert(text)
+ doc['text'] = jinja2.Markup.escape(text).striptags()
+ return doc
+
+ def add_to_solr(self, solr_instance):
+ solr_instance.add(self.solarize())
class SearchError(SolrError):
@@ -234,6 +271,8 @@ def search_app(q='', fq=None, app=True, **kw):
def find_shortlinks(text):
+ from .markdown_extensions import ForgeExtension
+
md = markdown.Markdown(
extensions=['codehilite', ForgeExtension(), 'tables'],
output_format='html4')
http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/model/artifact.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/artifact.py b/Allura/allura/model/artifact.py
index 07f4daf..1b0e2ad 100644
--- a/Allura/allura/model/artifact.py
+++ b/Allura/allura/model/artifact.py
@@ -32,6 +32,7 @@ from webhelpers import feedgenerator as FG
from allura.lib import helpers as h
from allura.lib import security
+from allura.lib.search import SearchIndexable
from .session import main_orm_session
from .session import project_orm_session
@@ -46,7 +47,7 @@ from filesystem import File
log = logging.getLogger(__name__)
-class Artifact(MappedClass):
+class Artifact(MappedClass, SearchIndexable):
"""
Base class for anything you want to keep track of.
@@ -296,11 +297,6 @@ class Artifact(MappedClass):
return self.app_config.load()(self.project, self.app_config)
def index_id(self):
- """Return a globally unique artifact identifier.
-
- Used for SOLR ID, shortlinks, and possibly elsewhere.
-
- """
id = '%s.%s#%s' % (
self.__class__.__module__,
self.__class__.__name__,
@@ -308,25 +304,6 @@ class Artifact(MappedClass):
return id.replace('.', '/')
def index(self):
- """Return a :class:`dict` representation of this Artifact suitable for
- search indexing.
-
- Subclasses should override this, providing a dictionary of solr_field => value.
- These fields & values will be stored by Solr. Subclasses should call the
- super() index() and then extend it with more fields.
-
- You probably want to override at least title and text to have
- meaningful search results and email senders.
-
- You can take advantage of Solr's dynamic field typing by adding a type
- suffix to your field names, e.g.:
-
- _s (string) (not analyzed)
- _t (text) (analyzed)
- _b (bool)
- _i (int)
-
- """
project = self.project
return dict(
id=self.index_id(),
http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/model/project.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/project.py b/Allura/allura/model/project.py
index 587fd11..271fe24 100644
--- a/Allura/allura/model/project.py
+++ b/Allura/allura/model/project.py
@@ -1019,6 +1019,17 @@ class Project(MappedClass, ActivityNode, ActivityObject):
else:
return 'busy'
+ def index_id(self):
+ id = 'unique id'
+ return id
+
+ def index(self):
+ provider = plugin.ProjectRegistrationProvider.get()
+ data = provider.index_project(self)
+ data.update(id=self.index_id(),
+ title='Project %s' % self._id)
+ return data
+
def __json__(self):
result = dict(
shortname=self.shortname,
http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/tasks/index_tasks.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tasks/index_tasks.py b/Allura/allura/tasks/index_tasks.py
index 7b6b4c6..222a926 100644
--- a/Allura/allura/tasks/index_tasks.py
+++ b/Allura/allura/tasks/index_tasks.py
@@ -25,10 +25,26 @@ from allura.lib.decorators import task
from allura.lib.exceptions import CompoundError
from allura.lib.solr import make_solr_from_config
+
log = logging.getLogger(__name__)
@task
+def add_project(project_id, solr_hosts=None):
+ '''
+ Add project to SOLR.
+
+ :param solr_hosts: a list of solr hists to use instead of defaults
+ :type solr_hosts: list of strings
+ '''
+ from allura import model as M
+
+ solr = make_solr_from_config(solr_hosts) if solr_hosts else g.solr
+ project = M.Project.query.get(_id=project_id)
+ project.add_to_solr(solr)
+
+
+@task
def add_artifacts(ref_ids, update_solr=True, update_refs=True, solr_hosts=None):
'''
Add the referenced artifacts to SOLR and shortlinks.
@@ -37,7 +53,8 @@ def add_artifacts(ref_ids, update_solr=True, update_refs=True, solr_hosts=None):
:type solr_hosts: [str]
'''
from allura import model as M
- from allura.lib.search import find_shortlinks, solarize
+ from allura.lib.search import find_shortlinks
+
if solr_hosts:
solr = make_solr_from_config(solr_hosts)
else:
@@ -48,7 +65,7 @@ def add_artifacts(ref_ids, update_solr=True, update_refs=True, solr_hosts=None):
for ref in M.ArtifactReference.query.find(dict(_id={'$in': ref_ids})):
try:
artifact = ref.artifact
- s = solarize(artifact)
+ s = artifact.solarize()
if s is None:
continue
if update_solr:
http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/tests/test_tasks.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/test_tasks.py b/Allura/allura/tests/test_tasks.py
index f06ee06..ee8adf3 100644
--- a/Allura/allura/tests/test_tasks.py
+++ b/Allura/allura/tests/test_tasks.py
@@ -149,7 +149,7 @@ class TestIndexTasks(unittest.TestCase):
sort_key = operator.itemgetter('id')
assert_equal(
sorted(solr.add.call_args[0][0], key=sort_key),
- sorted([search.solarize(ref.artifact) for ref in arefs],
+ sorted([ref.artifact.solarize() for ref in arefs],
key=sort_key))
index_tasks.del_artifacts(ref_ids)
M.main_orm_session.flush()
http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/tests/unit/test_solr.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/unit/test_solr.py b/Allura/allura/tests/unit/test_solr.py
index 5c7f93d..eb98637 100644
--- a/Allura/allura/tests/unit/test_solr.py
+++ b/Allura/allura/tests/unit/test_solr.py
@@ -25,7 +25,7 @@ from allura.lib import helpers as h
from allura.tests import decorators as td
from alluratest.controller import setup_basic_test
from allura.lib.solr import Solr, escape_solr_arg
-from allura.lib.search import solarize, search_app
+from allura.lib.search import search_app, SearchIndexable
class TestSolr(unittest.TestCase):
@@ -92,34 +92,34 @@ class TestSolr(unittest.TestCase):
solr.query_server.search.assert_called_once_with('bar', kw='kw')
-class TestSolarize(unittest.TestCase):
+class TestSearchIndexable(unittest.TestCase):
- def test_no_object(self):
- assert_equal(solarize(None), None)
-
- def test_empty_index(self):
- obj = mock.MagicMock()
- obj.index.return_value = None
- assert_equal(solarize(obj), None)
-
- def test_doc_without_text(self):
- obj = mock.MagicMock()
- obj.index.return_value = {}
- assert_equal(solarize(obj), {'text': ''})
-
- def test_strip_markdown(self):
- obj = mock.MagicMock()
- obj.index.return_value = {'text': '# Header'}
- assert_equal(solarize(obj), {'text': 'Header'})
-
- def test_html_in_text(self):
- obj = mock.MagicMock()
- obj.index.return_value = {'text': '<script>alert(1)</script>'}
- assert_equal(solarize(obj), {'text': ''})
-
- obj.index.return_value = {'text':
- '<script>alert(1)</script>'}
- assert_equal(solarize(obj), {'text': '<script>alert(1)</script>'})
+ def setUp(self):
+ self.obj = SearchIndexable()
+
+ def test_solarize_empty_index(self):
+ self.obj.index = lambda: None
+ assert_equal(self.obj.solarize(), None)
+
+ def test_solarize_doc_without_text(self):
+ self.obj.index = lambda: dict()
+ assert_equal(self.obj.solarize(), dict(text=''))
+
+ def test_solarize_strips_markdown(self):
+ self.obj.index = lambda: dict(text='# Header')
+ assert_equal(self.obj.solarize(), dict(text='Header'))
+
+ def test_solarize_html_in_text(self):
+ self.obj.index = lambda: dict(text='<script>a(1)</script>')
+ assert_equal(self.obj.solarize(), dict(text=''))
+ self.obj.index = lambda: dict(text='<script>a(1)</script>')
+ assert_equal(self.obj.solarize(), dict(text='<script>a(1)</script>'))
+
+ def test_add_to_solr(self):
+ solr_obj = mock.MagicMock()
+ self.obj.index = lambda: dict(text='test')
+ self.obj.add_to_solr(solr_obj)
+ solr_obj.add.assert_called_once_with(dict(text='test'))
class TestSearch_app(unittest.TestCase):