You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2014/05/21 16:44:51 UTC

[03/20] git commit: [#7257] ticket:562 Introduced base class for indexable objects

[#7257] ticket:562 Introduced base class for indexable objects


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/d107c7f6
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/d107c7f6
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/d107c7f6

Branch: refs/heads/master
Commit: d107c7f65da89cf962e0e0a415724bf0ddbdf717
Parents: 79ac15f
Author: Ferens Dmitriy <fe...@gmail.com>
Authored: Tue Mar 25 16:44:24 2014 +0200
Committer: Dave Brondsema <db...@slashdotmedia.com>
Committed: Wed May 21 14:44:19 2014 +0000

----------------------------------------------------------------------
 Allura/allura/lib/plugin.py           |  7 ++-
 Allura/allura/lib/search.py           | 71 +++++++++++++++++++++++-------
 Allura/allura/model/artifact.py       | 27 +-----------
 Allura/allura/model/project.py        | 11 +++++
 Allura/allura/tasks/index_tasks.py    | 21 ++++++++-
 Allura/allura/tests/test_tasks.py     |  2 +-
 Allura/allura/tests/unit/test_solr.py | 56 +++++++++++------------
 7 files changed, 122 insertions(+), 73 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/lib/plugin.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/plugin.py b/Allura/allura/lib/plugin.py
index 2a09d73..dcc8cbf 100644
--- a/Allura/allura/lib/plugin.py
+++ b/Allura/allura/lib/plugin.py
@@ -389,7 +389,6 @@ class LdapAuthenticationProvider(AuthenticationProvider):
 
 
 class ProjectRegistrationProvider(object):
-
     '''
     Project registration services for Allura.  This is a full implementation
     and the default.  Extend this class with your own if you need to add more
@@ -527,6 +526,12 @@ class ProjectRegistrationProvider(object):
             raise forge_exc.ProjectConflict(
                 '%s already exists in nbhd %s' % (shortname, neighborhood._id))
 
+    def index_project(self, project):
+        """
+        Put here additional fields given project should be indexed by SOLR.
+        """
+        return dict()
+
     def _create_project(self, neighborhood, shortname, project_name, user, user_project, private_project, apps):
         '''
         Actually create the project, no validation.  This should not be called directly

http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/lib/search.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/search.py b/Allura/allura/lib/search.py
index cd637ad..42cf25a 100644
--- a/Allura/allura/lib/search.py
+++ b/Allura/allura/lib/search.py
@@ -31,26 +31,63 @@ from pysolr import SolrError
 from allura.lib import helpers as h
 from allura.lib.solr import escape_solr_arg
 from allura.model import ArtifactReference
-from .markdown_extensions import ForgeExtension
 
 log = getLogger(__name__)
 
 
-def solarize(obj):
-    if obj is None:
-        return None
-    doc = obj.index()
-    if doc is None:
-        return None
-    # if index() returned doc without text, assume empty text
-    if not doc.get('text'):
-        doc['text'] = ''
-    # Convert text to plain text (It usually contains markdown markup).
-    # To do so, we convert markdown into html, and then strip all html tags.
-    text = doc['text']
-    text = g.markdown.convert(text)
-    doc['text'] = jinja2.Markup.escape(text).striptags()
-    return doc
+class SearchIndexable(object):
+
+    """
+    Base class for anything you want to search on.
+    """
+
+    def index_id(self):
+        """
+        Should return a globally unique artifact identifier.
+
+        Used for SOLR ID, shortlinks, and possibly elsewhere.
+        """
+        raise NotImplementedError
+
+    def index(self):
+        """
+        Return a :class:`dict` representation of this Artifact suitable for
+        search indexing.
+
+        Subclasses should implement this, providing a dictionary of solr_field => value.
+        These fields & values will be stored by Solr.  Subclasses should call the
+        super().index() and then extend it with more fields.
+
+        You probably want to override at least title and text to have
+        meaningful search results and email senders.
+
+        You can take advantage of Solr's dynamic field typing by adding a type
+        suffix to your field names, e.g.:
+
+            _s (string) (not analyzed)
+            _t (text) (analyzed)
+            _b (bool)
+            _i (int)
+            _f (float)
+            _dt (datetime)
+
+        """
+        raise NotImplementedError
+
+    def solarize(self):
+        doc = self.index()
+        if doc is None:
+            return None
+        # if index() returned doc without text, assume empty text
+        text = doc.setdefault('text', '')
+        # Convert text to plain text (It usually contains markdown markup).
+        # To do so, we convert markdown into html, and then strip all html tags.
+        text = g.markdown.convert(text)
+        doc['text'] = jinja2.Markup.escape(text).striptags()
+        return doc
+
+    def add_to_solr(self, solr_instance):
+        solr_instance.add(self.solarize())
 
 
 class SearchError(SolrError):
@@ -234,6 +271,8 @@ def search_app(q='', fq=None, app=True, **kw):
 
 
 def find_shortlinks(text):
+    from .markdown_extensions import ForgeExtension
+
     md = markdown.Markdown(
         extensions=['codehilite', ForgeExtension(), 'tables'],
         output_format='html4')

http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/model/artifact.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/artifact.py b/Allura/allura/model/artifact.py
index 07f4daf..1b0e2ad 100644
--- a/Allura/allura/model/artifact.py
+++ b/Allura/allura/model/artifact.py
@@ -32,6 +32,7 @@ from webhelpers import feedgenerator as FG
 from allura.lib import helpers as h
 from allura.lib import security
 
+from allura.lib.search import SearchIndexable
 from .session import main_orm_session
 from .session import project_orm_session
 
@@ -46,7 +47,7 @@ from filesystem import File
 log = logging.getLogger(__name__)
 
 
-class Artifact(MappedClass):
+class Artifact(MappedClass, SearchIndexable):
 
     """
     Base class for anything you want to keep track of.
@@ -296,11 +297,6 @@ class Artifact(MappedClass):
             return self.app_config.load()(self.project, self.app_config)
 
     def index_id(self):
-        """Return a globally unique artifact identifier.
-
-        Used for SOLR ID, shortlinks, and possibly elsewhere.
-
-        """
         id = '%s.%s#%s' % (
             self.__class__.__module__,
             self.__class__.__name__,
@@ -308,25 +304,6 @@ class Artifact(MappedClass):
         return id.replace('.', '/')
 
     def index(self):
-        """Return a :class:`dict` representation of this Artifact suitable for
-        search indexing.
-
-        Subclasses should override this, providing a dictionary of solr_field => value.
-        These fields & values will be stored by Solr.  Subclasses should call the
-        super() index() and then extend it with more fields.
-
-        You probably want to override at least title and text to have
-        meaningful search results and email senders.
-
-        You can take advantage of Solr's dynamic field typing by adding a type
-        suffix to your field names, e.g.:
-
-            _s (string) (not analyzed)
-            _t (text) (analyzed)
-            _b (bool)
-            _i (int)
-
-        """
         project = self.project
         return dict(
             id=self.index_id(),

http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/model/project.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/project.py b/Allura/allura/model/project.py
index 587fd11..271fe24 100644
--- a/Allura/allura/model/project.py
+++ b/Allura/allura/model/project.py
@@ -1019,6 +1019,17 @@ class Project(MappedClass, ActivityNode, ActivityObject):
         else:
             return 'busy'
 
+    def index_id(self):
+        id = 'unique id'
+        return id
+
+    def index(self):
+        provider = plugin.ProjectRegistrationProvider.get()
+        data = provider.index_project(self)
+        data.update(id=self.index_id(),
+                    title='Project %s' % self._id)
+        return data
+
     def __json__(self):
         result = dict(
             shortname=self.shortname,

http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/tasks/index_tasks.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tasks/index_tasks.py b/Allura/allura/tasks/index_tasks.py
index 7b6b4c6..222a926 100644
--- a/Allura/allura/tasks/index_tasks.py
+++ b/Allura/allura/tasks/index_tasks.py
@@ -25,10 +25,26 @@ from allura.lib.decorators import task
 from allura.lib.exceptions import CompoundError
 from allura.lib.solr import make_solr_from_config
 
+
 log = logging.getLogger(__name__)
 
 
 @task
+def add_project(project_id, solr_hosts=None):
+    '''
+    Add project to SOLR.
+
+    :param solr_hosts: a list of solr hists to use instead of defaults
+    :type solr_hosts: list of strings
+    '''
+    from allura import model as M
+
+    solr = make_solr_from_config(solr_hosts) if solr_hosts else g.solr
+    project = M.Project.query.get(_id=project_id)
+    project.add_to_solr(solr)
+
+
+@task
 def add_artifacts(ref_ids, update_solr=True, update_refs=True, solr_hosts=None):
     '''
     Add the referenced artifacts to SOLR and shortlinks.
@@ -37,7 +53,8 @@ def add_artifacts(ref_ids, update_solr=True, update_refs=True, solr_hosts=None):
     :type solr_hosts: [str]
     '''
     from allura import model as M
-    from allura.lib.search import find_shortlinks, solarize
+    from allura.lib.search import find_shortlinks
+
     if solr_hosts:
         solr = make_solr_from_config(solr_hosts)
     else:
@@ -48,7 +65,7 @@ def add_artifacts(ref_ids, update_solr=True, update_refs=True, solr_hosts=None):
         for ref in M.ArtifactReference.query.find(dict(_id={'$in': ref_ids})):
             try:
                 artifact = ref.artifact
-                s = solarize(artifact)
+                s = artifact.solarize()
                 if s is None:
                     continue
                 if update_solr:

http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/tests/test_tasks.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/test_tasks.py b/Allura/allura/tests/test_tasks.py
index f06ee06..ee8adf3 100644
--- a/Allura/allura/tests/test_tasks.py
+++ b/Allura/allura/tests/test_tasks.py
@@ -149,7 +149,7 @@ class TestIndexTasks(unittest.TestCase):
         sort_key = operator.itemgetter('id')
         assert_equal(
             sorted(solr.add.call_args[0][0], key=sort_key),
-            sorted([search.solarize(ref.artifact) for ref in arefs],
+            sorted([ref.artifact.solarize() for ref in arefs],
                    key=sort_key))
         index_tasks.del_artifacts(ref_ids)
         M.main_orm_session.flush()

http://git-wip-us.apache.org/repos/asf/allura/blob/d107c7f6/Allura/allura/tests/unit/test_solr.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/unit/test_solr.py b/Allura/allura/tests/unit/test_solr.py
index 5c7f93d..eb98637 100644
--- a/Allura/allura/tests/unit/test_solr.py
+++ b/Allura/allura/tests/unit/test_solr.py
@@ -25,7 +25,7 @@ from allura.lib import helpers as h
 from allura.tests import decorators as td
 from alluratest.controller import setup_basic_test
 from allura.lib.solr import Solr, escape_solr_arg
-from allura.lib.search import solarize, search_app
+from allura.lib.search import search_app, SearchIndexable
 
 
 class TestSolr(unittest.TestCase):
@@ -92,34 +92,34 @@ class TestSolr(unittest.TestCase):
         solr.query_server.search.assert_called_once_with('bar', kw='kw')
 
 
-class TestSolarize(unittest.TestCase):
+class TestSearchIndexable(unittest.TestCase):
 
-    def test_no_object(self):
-        assert_equal(solarize(None), None)
-
-    def test_empty_index(self):
-        obj = mock.MagicMock()
-        obj.index.return_value = None
-        assert_equal(solarize(obj), None)
-
-    def test_doc_without_text(self):
-        obj = mock.MagicMock()
-        obj.index.return_value = {}
-        assert_equal(solarize(obj), {'text': ''})
-
-    def test_strip_markdown(self):
-        obj = mock.MagicMock()
-        obj.index.return_value = {'text': '# Header'}
-        assert_equal(solarize(obj), {'text': 'Header'})
-
-    def test_html_in_text(self):
-        obj = mock.MagicMock()
-        obj.index.return_value = {'text': '<script>alert(1)</script>'}
-        assert_equal(solarize(obj), {'text': ''})
-
-        obj.index.return_value = {'text':
-                                  '&lt;script&gt;alert(1)&lt;/script&gt;'}
-        assert_equal(solarize(obj), {'text': '<script>alert(1)</script>'})
+    def setUp(self):
+        self.obj = SearchIndexable()
+
+    def test_solarize_empty_index(self):
+        self.obj.index = lambda: None
+        assert_equal(self.obj.solarize(), None)
+
+    def test_solarize_doc_without_text(self):
+        self.obj.index = lambda: dict()
+        assert_equal(self.obj.solarize(), dict(text=''))
+
+    def test_solarize_strips_markdown(self):
+        self.obj.index = lambda: dict(text='# Header')
+        assert_equal(self.obj.solarize(), dict(text='Header'))
+
+    def test_solarize_html_in_text(self):
+        self.obj.index = lambda: dict(text='<script>a(1)</script>')
+        assert_equal(self.obj.solarize(), dict(text=''))
+        self.obj.index = lambda: dict(text='&lt;script&gt;a(1)&lt;/script&gt;')
+        assert_equal(self.obj.solarize(), dict(text='<script>a(1)</script>'))
+
+    def test_add_to_solr(self):
+        solr_obj = mock.MagicMock()
+        self.obj.index = lambda: dict(text='test')
+        self.obj.add_to_solr(solr_obj)
+        solr_obj.add.assert_called_once_with(dict(text='test'))
 
 
 class TestSearch_app(unittest.TestCase):