You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bloodhound.apache.org by an...@apache.org on 2013/02/19 14:54:34 UTC
svn commit: r1447721 - in
/incubator/bloodhound/trunk/bloodhound_search/bhsearch: ./
search_resources/ tests/ tests/search_resources/
Author: andrej
Date: Tue Feb 19 13:54:34 2013
New Revision: 1447721
URL: http://svn.apache.org/r1447721
Log:
#389 Strip wiki formatting from the Bloodhound Search results
Added:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/base.py
Modified:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/base.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/__init__.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/index_with_whoosh.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/ticket_search.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/wiki_search.py
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py Tue Feb 19 13:54:34 2013
@@ -73,6 +73,14 @@ class SortInstruction(object):
return self.__dict__ == other.__dict__
+class ISearchWikiSyntaxFormatter(Interface):
+ """Extension point interface for wiki syntax processing.
+ """
+
+ def format(self, wiki_text):
+ """
+ Process wiki syntax and return text representation suitable for search
+ """
class ISearchBackend(Interface):
"""Extension point interface for search backend systems.
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/base.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/base.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/base.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/base.py Tue Feb 19 13:54:34 2013
@@ -19,8 +19,11 @@
# under the License.
r"""Base classes for Bloodhound Search plugin."""
-from trac.core import Component
-from trac.config import BoolOption
+import re
+
+from bhsearch.api import ISearchWikiSyntaxFormatter
+from trac.core import Component, implements
+from trac.config import BoolOption, ExtensionOption
class BaseIndexer(Component):
"""
@@ -29,6 +32,12 @@ class BaseIndexer(Component):
silence_on_error = BoolOption('bhsearch', 'silence_on_error', "True",
"""If true, do not throw an exception during indexing a resource""")
+ wiki_formatter = ExtensionOption('bhsearch', 'wiki_syntax_formatter',
+ ISearchWikiSyntaxFormatter, 'SimpleSearchWikiSyntaxFormatter',
+ 'Name of the component implementing wiki syntax to text formatter \
+ interface: ISearchWikiSyntaxFormatter.')
+
+
class BaseSearchParticipant(Component):
default_view = None
@@ -52,4 +61,26 @@ class BaseSearchParticipant(Component):
return (not req or self.required_permission in req.perm)
def get_participant_type(self):
- return self.participant_type
\ No newline at end of file
+ return self.participant_type
+
+class SimpleSearchWikiSyntaxFormatter(Component):
+ """
+ This class provide very naive formatting of wiki syntax to text
+ appropriate for indexing and search result presentation.
+ A lot of things can be improved here.
+ """
+ implements(ISearchWikiSyntaxFormatter)
+
+ STRIP_CHARS = re.compile(r'([=#\'\"\*/])')
+ REPLACE_CHARS = re.compile(r'([=#\[\]\{\}|])')
+
+ WHITE_SPACE_RE = re.compile(r'([\s]+)')
+ def format(self, wiki_content):
+ if not wiki_content:
+ return wiki_content
+ intermediate = self.STRIP_CHARS.sub("", wiki_content)
+ intermediate = self.REPLACE_CHARS.sub(" ", intermediate)
+ result = self.WHITE_SPACE_RE.sub(" ", intermediate)
+ return result.strip()
+
+
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py Tue Feb 19 13:54:34 2013
@@ -24,7 +24,7 @@ from bhsearch.api import ISearchParticip
IIndexParticipant, IndexFields
from bhsearch.search_resources.base import BaseIndexer, BaseSearchParticipant
from genshi.builder import tag
-from trac.ticket.api import ITicketChangeListener
+from trac.ticket.api import ITicketChangeListener, TicketSystem
from trac.ticket import Ticket
from trac.ticket.query import Query
from trac.config import ListOption, Option
@@ -54,6 +54,10 @@ class TicketIndexer(BaseIndexer):
'reporter': TicketFields.AUTHOR,
}
+ def __init__(self):
+ self.fields = TicketSystem(self.env).get_ticket_fields()
+ self.text_area_fields = set(
+ f['name'] for f in self.fields if f['type'] =='textarea')
#ITicketChangeListener methods
def ticket_created(self, ticket):
@@ -103,10 +107,14 @@ class TicketIndexer(BaseIndexer):
for field, index_field in self.optional_fields.iteritems():
if field in ticket.values:
- doc[index_field] = ticket.values[field]
+ field_content = ticket.values[field]
+ if field in self.text_area_fields:
+ field_content = self.wiki_formatter.format(field_content)
+ doc[index_field] = field_content
doc[TicketFields.CHANGES] = u'\n\n'.join(
- [x[4] for x in ticket.get_changelog() if x[2] == u'comment'])
+ [self.wiki_formatter.format(x[4]) for x in ticket.get_changelog()
+ if x[2] == u'comment'])
return doc
def get_entries_for_index(self):
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py Tue Feb 19 13:54:34 2013
@@ -99,7 +99,7 @@ class WikiIndexer(BaseIndexer):
IndexFields.TYPE: WIKI_TYPE,
IndexFields.TIME: page.time,
IndexFields.AUTHOR: page.author,
- IndexFields.CONTENT: page.text,
+ IndexFields.CONTENT: self.wiki_formatter.format(page.text),
}
return doc
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/__init__.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/__init__.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/__init__.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/__init__.py Tue Feb 19 13:54:34 2013
@@ -21,7 +21,7 @@ import unittest
from bhsearch.tests import (whoosh_backend, index_with_whoosh, web_ui,
api)
from bhsearch.tests.search_resources import (ticket_search, wiki_search,
- milestone_search)
+ milestone_search, base)
def suite():
test_suite = unittest.TestSuite()
@@ -32,6 +32,7 @@ def suite():
test_suite.addTest(ticket_search.suite())
test_suite.addTest(wiki_search.suite())
test_suite.addTest(milestone_search.suite())
+ test_suite.addTest(base.suite())
return test_suite
if __name__ == '__main__':
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/index_with_whoosh.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/index_with_whoosh.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/index_with_whoosh.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/index_with_whoosh.py Tue Feb 19 13:54:34 2013
@@ -17,7 +17,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-from contextlib import closing
import unittest
import shutil
@@ -25,10 +24,7 @@ from bhsearch.api import BloodhoundSearc
from bhsearch.search_resources.milestone_search import MilestoneIndexer
from bhsearch.tests.base import BaseBloodhoundSearchTest
from bhsearch.search_resources.ticket_search import TicketIndexer
-
from bhsearch.whoosh_backend import WhooshBackend
-from trac.test import MockPerm
-from trac.web import Href
class IndexWhooshTestCase(BaseBloodhoundSearchTest):
def setUp(self):
@@ -116,59 +112,9 @@ class IndexWhooshTestCase(BaseBloodhound
self.print_result(results)
self.assertEqual(2, results.hits)
-class FormattingTestCase(BaseBloodhoundSearchTest):
- def setUp(self):
- super(FormattingTestCase, self).setUp(
- ['trac.*', 'bhsearch.*'],
- create_req=True,
- )
-
- def test_can_format_wiki_to_text(self):
- wiki_content = """= Header #overview
-
- '''bold''', ''italic'', '''''Wikipedia style'''''
-
- {{{
- code
- }}}
-
- [[PageOutline]]
-
- || '''Table''' || sampe ||
-
- - list
- - sample
-
- [[Image(mockup_tickets.png)]] [wiki:SomePage p1] [ticket:1 ticket one]
- http://www.edgewall.com/,
- [http://www.edgewall.com Edgewall Software]
-
- [#point1]
- """
-# wiki_content = """
-#'''''one''''', '''''two''''', '''''three''''', '''''four'''''
-# """
-
- page = self.create_wiki("Dummy wiki", wiki_content)
- from trac.mimeview.api import RenderingContext
- context = RenderingContext(
- page.resource,
- href=Href('/'),
- perm=MockPerm(),
- )
- context.req = None # 1.0 FIXME .req shouldn't be required by formatter
-# result = format_to_oneliner(self.env, context, wiki_content)
-# from trac.wiki.formatter import format_to_oneliner
- from trac.web.chrome import format_to_html
- result = format_to_html(self.env, context, wiki_content)
- print result
-#
-
-
def suite():
test_suite = unittest.TestSuite()
test_suite.addTest(unittest.makeSuite(IndexWhooshTestCase, 'test'))
- test_suite.addTest(unittest.makeSuite(FormattingTestCase, 'test'))
return test_suite
if __name__ == '__main__':
Added: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/base.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/base.py?rev=1447721&view=auto
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/base.py (added)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/base.py Tue Feb 19 13:54:34 2013
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+from bhsearch.search_resources.base import SimpleSearchWikiSyntaxFormatter
+from bhsearch.tests.base import BaseBloodhoundSearchTest
+from trac.web import Href
+from trac.test import MockPerm
+from trac.wiki import format_to_html
+
+class SimpleSearchWikiSyntaxFormatterTestCase(BaseBloodhoundSearchTest):
+ def setUp(self):
+ super(SimpleSearchWikiSyntaxFormatterTestCase, self).setUp(
+ ['trac.*', 'bhsearch.*'],
+ create_req=True,
+ )
+ self.text_formatter = SimpleSearchWikiSyntaxFormatter(self.env)
+
+
+ def test_can_format_header(self):
+ wiki_content = """= Header #overview
+ some text"""
+ result = self._call_format(wiki_content)
+ self.assertEqual("Header overview some text", result)
+
+ def test_can_format_code(self):
+ wiki_content = """{{{
+ some code
+ }}}
+ text"""
+ result = self._call_format(wiki_content)
+ self.assertEqual("some code text", result)
+
+ def test_can_format_anchor(self):
+ wiki_content = """sometext1
+ [#point1]
+ sometext2
+ """
+ result = self._call_format(wiki_content)
+ self.assertEqual("sometext1 point1 sometext2", result)
+
+ def test_can_format_wiki_link(self):
+ self.assertEqual("wiki:SomePage p1", self._call_format("[wiki:SomePage p1]"))
+
+ def test_can_format_sample_wiki_link(self):
+ self.assertEqual("WikiPage", self._call_format("WikiPage"))
+
+
+ def test_can_format_makro(self):
+ """
+ Makro links must be formatted as text
+ """
+ self.assertEqual(
+ "TicketQuery(keywords~x, formattable, colid)",
+ self._call_format(
+ "[[TicketQuery(keywords=~x, format=table, col=id)]]"))
+
+ def test_can_format_stared_font_makers(self):
+ self.assertEqual(
+ "bold, italic, WikiCreole style",
+ self._call_format(
+ "**bold**, //italic//, **//WikiCreole style//**"))
+
+
+ @unittest.skip("TODO")
+ def test_can_format_non_wiki_camel_case(self):
+ self.assertEqual("WikiPage", self._call_format("!WikiPage"))
+
+
+ def _call_format(self, wiki_content):
+# page = self.create_wiki("Dummy wiki", wiki_content)
+# from trac.mimeview.api import RenderingContext
+# context = RenderingContext(
+# page.resource,
+# href=Href('/'),
+# perm=MockPerm(),
+# )
+# context.req = None # 1.0 FIXME .req shouldn't be required by formatter
+# result = self.text_formatter.format(context, wiki_content)
+ result = self.text_formatter.format(wiki_content)
+ print "Input text:"
+ print wiki_content
+ print "-------------------------"
+ print "Formatted text:"
+ print result
+ return result
+
+ @unittest.skip("Use for debug purposes only")
+ def test_run_html_formatter(self):
+ wiki_content = "!WikiSyntax"
+ page = self.create_wiki("Dummy wiki", wiki_content)
+ from trac.mimeview.api import RenderingContext
+ context = RenderingContext(
+ page.resource,
+ href=Href('/'),
+ perm=MockPerm(),
+ )
+ context.req = None # 1.0 FIXME .req shouldn't be required by formatter
+ format_to_html(self.env, context, wiki_content)
+
+
+def suite():
+ test_suite = unittest.TestSuite()
+ test_suite.addTest(unittest.makeSuite(SimpleSearchWikiSyntaxFormatterTestCase, 'test'))
+ return test_suite
+
+if __name__ == '__main__':
+ unittest.main()
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/ticket_search.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/ticket_search.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/ticket_search.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/ticket_search.py Tue Feb 19 13:54:34 2013
@@ -18,6 +18,7 @@
# specific language governing permissions and limitations
# under the License.
import unittest
+from bhsearch.api import BloodhoundSearchApi
from bhsearch.tests.base import BaseBloodhoundSearchTest
from bhsearch.search_resources.ticket_search import TicketIndexer
@@ -25,13 +26,15 @@ from bhsearch.search_resources.ticket_se
class TicketIndexerSilenceOnExceptionTestCase(BaseBloodhoundSearchTest):
def setUp(self):
super(TicketIndexerSilenceOnExceptionTestCase, self).setUp()
- self.env.config.set('bhsearch', 'silence_on_error', "True")
self.ticket_indexer = TicketIndexer(self.env)
+ self.search_api = BloodhoundSearchApi(self.env)
+ self.env.config.set('bhsearch', 'silence_on_error', "False")
def tearDown(self):
pass
def test_does_not_raise_exception_by_default(self):
+ self.env.config.set('bhsearch', 'silence_on_error', "True")
self.ticket_indexer.ticket_created(None)
def test_raise_exception_if_configured(self):
@@ -41,6 +44,15 @@ class TicketIndexerSilenceOnExceptionTes
self.ticket_indexer.ticket_created,
None)
+ def test_can_strip_wiki_syntax(self):
+ #act
+ self.insert_ticket("T1", description=" = Header")
+ #assert
+ results = self.search_api.query("*:*")
+ self.print_result(results)
+ self.assertEqual(1, results.hits)
+ self.assertEqual("Header", results.docs[0]["content"])
+
def suite():
return unittest.makeSuite(TicketIndexerSilenceOnExceptionTestCase, 'test')
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/wiki_search.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/wiki_search.py?rev=1447721&r1=1447720&r2=1447721&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/wiki_search.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/search_resources/wiki_search.py Tue Feb 19 13:54:34 2013
@@ -128,6 +128,16 @@ class WikiIndexerEventsTestCase(BaseBloo
self.assertEqual(1, results.hits)
self.assertEqual("version1", results.docs[0]["content"])
+ def test_can_strip_wiki_formatting(self):
+ #arrange
+ self.insert_wiki(self.DUMMY_PAGE_NAME, " = Header")
+ #act
+ results = self.search_api.query("*:*")
+ #assert
+ self.print_result(results)
+ self.assertEqual(1, results.hits)
+ self.assertEqual("Header", results.docs[0]["content"])
+
def suite():
test_suite = unittest.TestSuite()
test_suite.addTest(unittest.makeSuite(
Re: svn commit: r1447721 - in /incubator/bloodhound/trunk/bloodhound_search/bhsearch:
./ search_resources/ tests/ tests/search_resources/
Posted by Andrej Golcov <an...@digiverse.si>.
Hi,
If you played with Bloodhound Search, please rebuild search index in
order to get rid of wiki syntax in search results.
You can rebuild index by running trac-admin tool:
trac-admin <path_to_trac_environment> bhsearch rebuild
As usual, any feedback is highly appreciated :)
Regards, Andrej
On 19 February 2013 14:54, <an...@apache.org> wrote:
> Author: andrej
> Date: Tue Feb 19 13:54:34 2013
> New Revision: 1447721
>
> URL: http://svn.apache.org/r1447721
> Log:
> #389 Strip wiki formatting from the Bloodhound Search results
[...]