You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bloodhound.apache.org by an...@apache.org on 2013/02/19 14:54:34 UTC

svn commit: r1447721 - in /incubator/bloodhound/trunk/bloodhound_search/bhsearch: ./ search_resources/ tests/ tests/search_resources/

Author: andrej
Date: Tue Feb 19 13:54:34 2013
New Revision: 1447721

URL: http://svn.apache.org/r1447721
Log:
#389 Strip wiki formatting from the Bloodhound Search results

Added:
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/base.py
Modified:
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/base.py
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/__init__.py
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/index_with_whoosh.py
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/ticket_search.py
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/wiki_search.py

Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py Tue Feb 19 13:54:34 2013
@@ -73,6 +73,14 @@ class SortInstruction(object):
         return self.__dict__ == other.__dict__
 
 
+class ISearchWikiSyntaxFormatter(Interface):
+    """Extension point interface for wiki syntax processing.
+    """
+
+    def format(self, wiki_text):
+        """
+        Process wiki syntax and return text representation suitable for search
+        """
 
 class ISearchBackend(Interface):
     """Extension point interface for search backend systems.

Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/base.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/base.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/base.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/base.py Tue Feb 19 13:54:34 2013
@@ -19,8 +19,11 @@
 #  under the License.
 
 r"""Base classes for Bloodhound Search plugin."""
-from trac.core import Component
-from trac.config import BoolOption
+import re
+
+from bhsearch.api import ISearchWikiSyntaxFormatter
+from trac.core import Component, implements
+from trac.config import BoolOption, ExtensionOption
 
 class BaseIndexer(Component):
     """
@@ -29,6 +32,12 @@ class BaseIndexer(Component):
     silence_on_error = BoolOption('bhsearch', 'silence_on_error', "True",
         """If true, do not throw an exception during indexing a resource""")
 
+    wiki_formatter = ExtensionOption('bhsearch', 'wiki_syntax_formatter',
+        ISearchWikiSyntaxFormatter, 'SimpleSearchWikiSyntaxFormatter',
+        'Name of the component implementing wiki syntax to text formatter \
+        interface: ISearchWikiSyntaxFormatter.')
+
+
 
 class BaseSearchParticipant(Component):
     default_view = None
@@ -52,4 +61,26 @@ class BaseSearchParticipant(Component):
         return (not req or self.required_permission in req.perm)
 
     def get_participant_type(self):
-        return self.participant_type
\ No newline at end of file
+        return self.participant_type
+
+class SimpleSearchWikiSyntaxFormatter(Component):
+    """
+    This class provide very naive formatting of wiki syntax to text
+    appropriate for indexing and search result presentation.
+    A lot of things can be improved here.
+    """
+    implements(ISearchWikiSyntaxFormatter)
+
+    STRIP_CHARS = re.compile(r'([=#\'\"\*/])')
+    REPLACE_CHARS = re.compile(r'([=#\[\]\{\}|])')
+
+    WHITE_SPACE_RE = re.compile(r'([\s]+)')
+    def format(self, wiki_content):
+        if not wiki_content:
+            return wiki_content
+        intermediate = self.STRIP_CHARS.sub("", wiki_content)
+        intermediate = self.REPLACE_CHARS.sub(" ", intermediate)
+        result = self.WHITE_SPACE_RE.sub(" ", intermediate)
+        return result.strip()
+
+

Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py Tue Feb 19 13:54:34 2013
@@ -24,7 +24,7 @@ from bhsearch.api import ISearchParticip
     IIndexParticipant, IndexFields
 from bhsearch.search_resources.base import BaseIndexer, BaseSearchParticipant
 from genshi.builder import tag
-from trac.ticket.api import ITicketChangeListener
+from trac.ticket.api import ITicketChangeListener, TicketSystem
 from trac.ticket import Ticket
 from trac.ticket.query import Query
 from trac.config import ListOption, Option
@@ -54,6 +54,10 @@ class TicketIndexer(BaseIndexer):
         'reporter': TicketFields.AUTHOR,
     }
 
+    def __init__(self):
+        self.fields = TicketSystem(self.env).get_ticket_fields()
+        self.text_area_fields = set(
+            f['name'] for f in self.fields if f['type'] =='textarea')
 
     #ITicketChangeListener methods
     def ticket_created(self, ticket):
@@ -103,10 +107,14 @@ class TicketIndexer(BaseIndexer):
 
         for field, index_field in self.optional_fields.iteritems():
             if field in ticket.values:
-                doc[index_field] = ticket.values[field]
+                field_content = ticket.values[field]
+                if field in self.text_area_fields:
+                    field_content = self.wiki_formatter.format(field_content)
+                doc[index_field] = field_content
 
         doc[TicketFields.CHANGES] = u'\n\n'.join(
-            [x[4] for x in ticket.get_changelog() if x[2] == u'comment'])
+            [self.wiki_formatter.format(x[4]) for x in ticket.get_changelog()
+             if x[2] == u'comment'])
         return doc
 
     def get_entries_for_index(self):

Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py Tue Feb 19 13:54:34 2013
@@ -99,7 +99,7 @@ class WikiIndexer(BaseIndexer):
             IndexFields.TYPE: WIKI_TYPE,
             IndexFields.TIME: page.time,
             IndexFields.AUTHOR: page.author,
-            IndexFields.CONTENT: page.text,
+            IndexFields.CONTENT: self.wiki_formatter.format(page.text),
         }
         return doc
 

Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/__init__.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/__init__.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/__init__.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/__init__.py Tue Feb 19 13:54:34 2013
@@ -21,7 +21,7 @@ import unittest
 from bhsearch.tests import (whoosh_backend, index_with_whoosh, web_ui,
                             api)
 from bhsearch.tests.search_resources import (ticket_search, wiki_search,
-                                             milestone_search)
+                                             milestone_search, base)
 
 def suite():
     test_suite = unittest.TestSuite()
@@ -32,6 +32,7 @@ def suite():
     test_suite.addTest(ticket_search.suite())
     test_suite.addTest(wiki_search.suite())
     test_suite.addTest(milestone_search.suite())
+    test_suite.addTest(base.suite())
     return test_suite
 
 if __name__ == '__main__':

Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/index_with_whoosh.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/index_with_whoosh.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/index_with_whoosh.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/index_with_whoosh.py Tue Feb 19 13:54:34 2013
@@ -17,7 +17,6 @@
 #  KIND, either express or implied.  See the License for the
 #  specific language governing permissions and limitations
 #  under the License.
-from contextlib import closing
 
 import unittest
 import shutil
@@ -25,10 +24,7 @@ from bhsearch.api import BloodhoundSearc
 from bhsearch.search_resources.milestone_search import MilestoneIndexer
 from bhsearch.tests.base import BaseBloodhoundSearchTest
 from bhsearch.search_resources.ticket_search import TicketIndexer
-
 from bhsearch.whoosh_backend import WhooshBackend
-from trac.test import MockPerm
-from trac.web import Href
 
 class IndexWhooshTestCase(BaseBloodhoundSearchTest):
     def setUp(self):
@@ -116,59 +112,9 @@ class IndexWhooshTestCase(BaseBloodhound
         self.print_result(results)
         self.assertEqual(2, results.hits)
 
-class FormattingTestCase(BaseBloodhoundSearchTest):
-    def setUp(self):
-        super(FormattingTestCase, self).setUp(
-            ['trac.*', 'bhsearch.*'],
-            create_req=True,
-        )
-
-    def test_can_format_wiki_to_text(self):
-        wiki_content = """= Header #overview
-
-        '''bold''', ''italic'', '''''Wikipedia style'''''
-
-        {{{
-        code
-        }}}
-
-        [[PageOutline]]
-
-        || '''Table''' || sampe ||
-
-          - list
-          - sample
-
-        [[Image(mockup_tickets.png)]] [wiki:SomePage p1] [ticket:1 ticket one]
-        http://www.edgewall.com/,
-        [http://www.edgewall.com Edgewall Software]
-
-        [#point1]
-        """
-#        wiki_content = """
-#'''''one''''', '''''two''''', '''''three''''', '''''four'''''
-#        """
-
-        page = self.create_wiki("Dummy wiki", wiki_content)
-        from trac.mimeview.api import RenderingContext
-        context = RenderingContext(
-            page.resource,
-            href=Href('/'),
-            perm=MockPerm(),
-        )
-        context.req = None # 1.0 FIXME .req shouldn't be required by formatter
-#        result = format_to_oneliner(self.env, context, wiki_content)
-#        from trac.wiki.formatter import format_to_oneliner
-        from trac.web.chrome import  format_to_html
-        result = format_to_html(self.env, context, wiki_content)
-        print result
-#
-
-
 def suite():
     test_suite = unittest.TestSuite()
     test_suite.addTest(unittest.makeSuite(IndexWhooshTestCase, 'test'))
-    test_suite.addTest(unittest.makeSuite(FormattingTestCase, 'test'))
     return test_suite
 
 if __name__ == '__main__':

Added: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/base.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/base.py?rev=1447721&view=auto
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/base.py (added)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/base.py Tue Feb 19 13:54:34 2013
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+import unittest
+from bhsearch.search_resources.base import SimpleSearchWikiSyntaxFormatter
+from bhsearch.tests.base import BaseBloodhoundSearchTest
+from trac.web import Href
+from trac.test import MockPerm
+from trac.wiki import format_to_html
+
+class SimpleSearchWikiSyntaxFormatterTestCase(BaseBloodhoundSearchTest):
+    def setUp(self):
+        super(SimpleSearchWikiSyntaxFormatterTestCase, self).setUp(
+            ['trac.*', 'bhsearch.*'],
+            create_req=True,
+        )
+        self.text_formatter = SimpleSearchWikiSyntaxFormatter(self.env)
+
+
+    def test_can_format_header(self):
+        wiki_content = """= Header #overview
+        some text"""
+        result = self._call_format(wiki_content)
+        self.assertEqual("Header overview some text", result)
+
+    def test_can_format_code(self):
+        wiki_content = """{{{
+        some code
+        }}}
+        text"""
+        result = self._call_format(wiki_content)
+        self.assertEqual("some code text", result)
+
+    def test_can_format_anchor(self):
+        wiki_content = """sometext1
+        [#point1]
+        sometext2
+        """
+        result = self._call_format(wiki_content)
+        self.assertEqual("sometext1 point1 sometext2", result)
+
+    def test_can_format_wiki_link(self):
+        self.assertEqual("wiki:SomePage p1", self._call_format("[wiki:SomePage p1]"))
+
+    def test_can_format_sample_wiki_link(self):
+        self.assertEqual("WikiPage", self._call_format("WikiPage"))
+
+
+    def test_can_format_makro(self):
+        """
+        Makro links must be formatted as text
+        """
+        self.assertEqual(
+            "TicketQuery(keywords~x, formattable, colid)",
+            self._call_format(
+                "[[TicketQuery(keywords=~x, format=table, col=id)]]"))
+
+    def test_can_format_stared_font_makers(self):
+        self.assertEqual(
+            "bold, italic, WikiCreole style",
+            self._call_format(
+                "**bold**, //italic//, **//WikiCreole style//**"))
+
+
+    @unittest.skip("TODO")
+    def test_can_format_non_wiki_camel_case(self):
+        self.assertEqual("WikiPage", self._call_format("!WikiPage"))
+
+
+    def _call_format(self, wiki_content):
+#        page = self.create_wiki("Dummy wiki", wiki_content)
+#        from trac.mimeview.api import RenderingContext
+#        context = RenderingContext(
+#            page.resource,
+#            href=Href('/'),
+#            perm=MockPerm(),
+#        )
+#        context.req = None # 1.0 FIXME .req shouldn't be required by formatter
+#        result = self.text_formatter.format(context, wiki_content)
+        result = self.text_formatter.format(wiki_content)
+        print "Input text:"
+        print wiki_content
+        print "-------------------------"
+        print "Formatted text:"
+        print result
+        return result
+
+    @unittest.skip("Use for debug purposes only")
+    def test_run_html_formatter(self):
+        wiki_content = "!WikiSyntax"
+        page = self.create_wiki("Dummy wiki", wiki_content)
+        from trac.mimeview.api import RenderingContext
+        context = RenderingContext(
+            page.resource,
+            href=Href('/'),
+            perm=MockPerm(),
+        )
+        context.req = None # 1.0 FIXME .req shouldn't be required by formatter
+        format_to_html(self.env, context, wiki_content)
+
+
+def suite():
+    test_suite = unittest.TestSuite()
+    test_suite.addTest(unittest.makeSuite(SimpleSearchWikiSyntaxFormatterTestCase, 'test'))
+    return test_suite
+
+if __name__ == '__main__':
+    unittest.main()

Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/ticket_search.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/ticket_search.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/ticket_search.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/ticket_search.py Tue Feb 19 13:54:34 2013
@@ -18,6 +18,7 @@
 #  specific language governing permissions and limitations
 #  under the License.
 import unittest
+from bhsearch.api import BloodhoundSearchApi
 
 from bhsearch.tests.base import BaseBloodhoundSearchTest
 from bhsearch.search_resources.ticket_search import TicketIndexer
@@ -25,13 +26,15 @@ from bhsearch.search_resources.ticket_se
 class TicketIndexerSilenceOnExceptionTestCase(BaseBloodhoundSearchTest):
     def setUp(self):
         super(TicketIndexerSilenceOnExceptionTestCase, self).setUp()
-        self.env.config.set('bhsearch', 'silence_on_error', "True")
         self.ticket_indexer = TicketIndexer(self.env)
+        self.search_api = BloodhoundSearchApi(self.env)
+        self.env.config.set('bhsearch', 'silence_on_error', "False")
 
     def tearDown(self):
         pass
 
     def test_does_not_raise_exception_by_default(self):
+        self.env.config.set('bhsearch', 'silence_on_error', "True")
         self.ticket_indexer.ticket_created(None)
 
     def test_raise_exception_if_configured(self):
@@ -41,6 +44,15 @@ class TicketIndexerSilenceOnExceptionTes
             self.ticket_indexer.ticket_created,
             None)
 
+    def test_can_strip_wiki_syntax(self):
+        #act
+        self.insert_ticket("T1", description=" = Header")
+        #assert
+        results = self.search_api.query("*:*")
+        self.print_result(results)
+        self.assertEqual(1, results.hits)
+        self.assertEqual("Header", results.docs[0]["content"])
+
 
 def suite():
     return unittest.makeSuite(TicketIndexerSilenceOnExceptionTestCase, 'test')

Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/wiki_search.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/wiki_search.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/wiki_search.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/wiki_search.py Tue Feb 19 13:54:34 2013
@@ -128,6 +128,16 @@ class WikiIndexerEventsTestCase(BaseBloo
         self.assertEqual(1, results.hits)
         self.assertEqual("version1", results.docs[0]["content"])
 
+    def test_can_strip_wiki_formatting(self):
+        #arrange
+        self.insert_wiki(self.DUMMY_PAGE_NAME, " = Header")
+        #act
+        results = self.search_api.query("*:*")
+        #assert
+        self.print_result(results)
+        self.assertEqual(1, results.hits)
+        self.assertEqual("Header", results.docs[0]["content"])
+
 def suite():
     test_suite = unittest.TestSuite()
     test_suite.addTest(unittest.makeSuite(



Re: svn commit: r1447721 - in /incubator/bloodhound/trunk/bloodhound_search/bhsearch: ./ search_resources/ tests/ tests/search_resources/

Posted by Andrej Golcov <an...@digiverse.si>.
Hi,

If you played with Bloodhound Search, please rebuild search index in
order to get rid of wiki syntax in search results.
You can rebuild index by running trac-admin tool:

trac-admin <path_to_trac_environment> bhsearch rebuild

As usual, any feedback is highly appreciated :)
Regards, Andrej


On 19 February 2013 14:54,  <an...@apache.org> wrote:
> Author: andrej
> Date: Tue Feb 19 13:54:34 2013
> New Revision: 1447721
>
> URL: http://svn.apache.org/r1447721
> Log:
> #389 Strip wiki formatting from the Bloodhound Search results
[...]