You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bloodhound.apache.org by sa...@apache.org on 2014/02/09 00:01:51 UTC

svn commit: r1566160 - in /bloodhound/trunk/bloodhound_search: bhsearch/tests/whoosh_backend.py bhsearch/whoosh_backend.py setup.py

Author: saintgermain
Date: Sat Feb  8 23:01:51 2014
New Revision: 1566160

URL: http://svn.apache.org/r1566160
Log:
Update Whoosh dependency to 2.5.1 or superior. Refs #741.

- Remove the Bloodhound Whoosh workaround/fix (has been fixed in Whoosh 2.5.1)
- Update the tests
  - Whoosh 'score' is now a float instead of a string)
  - Detection of the fix is not necessary anymore
- Update the Whoosh dependency in bloodhound_search setup

Modified:
    bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
    bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
    bloodhound/trunk/bloodhound_search/setup.py

Modified: bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
URL: http://svn.apache.org/viewvc/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py?rev=1566160&r1=1566159&r2=1566160&view=diff
==============================================================================
--- bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py (original)
+++ bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py Sat Feb  8 23:01:51 2014
@@ -28,8 +28,7 @@ from bhsearch.api import ASC, DESC, SCOR
 from bhsearch.query_parser import DefaultQueryParser
 from bhsearch.tests import unittest
 from bhsearch.tests.base import BaseBloodhoundSearchTest
-from bhsearch.whoosh_backend import WhooshBackend, \
-    WhooshEmptyFacetErrorWorkaround
+from bhsearch.whoosh_backend import WhooshBackend
 from whoosh import index, query, sorting
 from whoosh.fields import ID, KEYWORD, TEXT, Schema
 from whoosh.qparser import MultifieldParser, MultifieldPlugin, PhrasePlugin, \
@@ -54,12 +53,12 @@ class WhooshBackendTestCase(BaseBloodhou
         self.assertEqual(2, result.hits)
         docs = result.docs
         self.assertEqual(
-            {'id': u'1', 'type': u'ticket', 'unique_id': u'empty:ticket:1',
-             'score': u'1'},
+            {'id': u'1', 'type': u'ticket', 'unique_id': u'ticket:1',
+             'score': 0},
             docs[0])
         self.assertEqual(
-            {'id': u'2', 'type': u'ticket', 'unique_id': u'empty:ticket:2',
-             'score': u'2'},
+            {'id': u'2', 'type': u'ticket', 'unique_id': u'ticket:2',
+             'score': 1},
             docs[1])
 
     def test_can_return_all_fields(self):
@@ -68,7 +67,7 @@ class WhooshBackendTestCase(BaseBloodhou
         self.print_result(result)
         docs = result.docs
         self.assertEqual(
-            {'id': u'1', 'type': u'ticket', 'unique_id': u'empty:ticket:1',
+            {'id': u'1', 'type': u'ticket', 'unique_id': u'ticket:1',
                 "score": 1.0},
             docs[0])
 
@@ -476,43 +475,6 @@ class WhooshFunctionalityTestCase(unitte
             {'status': {None: 1, 'New': 1}, 'type': {'type1': 1, 'type2': 1}},
             facets)
 
-    def test_out_of_range_on_empty_facets(self):
-        """
-        Whoosh raises exception IndexError: list index out of range
-        when search contains facets on field that is missing in at least one
-        document in the index. The error manifests only when index contains
-        more than one segment
-
-        The problem expected to be fixed in the next release.
-
-        For the time of being, whoosh-backend have to introduce workaround in
-        order to fix the problem. This unit-test is just a reminder to remove
-        workaround when the fixed version of Whoosh is applied.
-        """
-        schema = Schema(
-                unique_id=ID(stored=True, unique=True),
-                status=ID(stored=True),
-                )
-
-#        ix = RamStorage().create_index(schema)
-        ix = index.create_in(self.index_dir, schema=schema)
-        def insert_docs():
-            with ix.writer() as w:
-                for i in range(10):
-                    w.add_document(unique_id=unicode(i))
-
-        #the problem occurs only when index contains more than one segment
-        insert_docs()
-        insert_docs()
-
-        with ix.searcher() as s:
-            with self.assertRaises(IndexError):
-                s.search(
-                    query.Every(),
-                    groupedby=(u"status"),
-                    maptype=sorting.Count,
-                )
-
     def _load_facets(self, non_paged_results):
         facet_names = non_paged_results.facet_names()
         if not facet_names:
@@ -602,56 +564,10 @@ class WhooshFunctionalityTestCase(unitte
         self.assertEquals(len(r), 0)
 
 
-class WhooshEmptyFacetErrorWorkaroundTestCase(BaseBloodhoundSearchTest):
-    def setUp(self):
-        super(WhooshEmptyFacetErrorWorkaroundTestCase, self).setUp()
-        self.whoosh_backend = WhooshBackend(self.env)
-        self.whoosh_backend.recreate_index()
-        self.parser = DefaultQueryParser(self.env)
-        self.empty_facet_workaround = WhooshEmptyFacetErrorWorkaround(self.env)
-
-    def tearDown(self):
-        shutil.rmtree(self.env.path)
-        self.env.reset_db()
-
-    def test_set_should_not_be_empty_fields(self):
-        self.insert_ticket("test x")
-        result = self.whoosh_backend.query(query.Every())
-        self.print_result(result)
-        doc = result.docs[0]
-        null_marker = WhooshEmptyFacetErrorWorkaround.NULL_MARKER
-        self.assertEqual(null_marker, doc["component"])
-        self.assertEqual(null_marker, doc["status"])
-        self.assertEqual(null_marker, doc["milestone"])
-
-    def test_can_fix_query_filter(self):
-        parsed_filter = self.parser.parse_filters(
-            ["type:ticket", "NOT (milestone:*)"])
-        query_parameters = dict(filter=parsed_filter)
-        self.empty_facet_workaround.query_pre_process(
-            query_parameters)
-
-        result_filter = query_parameters["filter"]
-        self.assertEquals('(type:ticket AND milestone:empty)',
-            str(result_filter))
-
-    def test_does_interfere_query_filter_if_not_needed(self):
-        parsed_filter = self.parser.parse_filters(
-            ["type:ticket", "milestone:aaa"])
-        query_parameters = dict(filter=parsed_filter)
-        self.empty_facet_workaround.query_pre_process(
-            query_parameters)
-
-        result_filter = query_parameters["filter"]
-        self.assertEquals('(type:ticket AND milestone:aaa)',
-            str(result_filter))
-
 def suite():
     test_suite = unittest.TestSuite()
     test_suite.addTest(unittest.makeSuite(WhooshBackendTestCase, 'test'))
     test_suite.addTest(unittest.makeSuite(WhooshFunctionalityTestCase, 'test'))
-    test_suite.addTest(
-        unittest.makeSuite(WhooshEmptyFacetErrorWorkaroundTestCase, 'test'))
     return test_suite
 
 if __name__ == '__main__':

Modified: bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
URL: http://svn.apache.org/viewvc/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py?rev=1566160&r1=1566159&r2=1566160&view=diff
==============================================================================
--- bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py (original)
+++ bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py Sat Feb  8 23:01:51 2014
@@ -39,10 +39,6 @@ from whoosh.collectors import FilterColl
 from whoosh.writing import AsyncWriter
 from datetime import datetime
 
-from bhsearch.whoosh_fixes import fixes_for
-for fix in fixes_for(whoosh.__version__):
-    apply(fix)
-
 UNIQUE_ID = "unique_id"
 
 
@@ -283,26 +279,11 @@ class WhooshBackend(Component):
         searcher.collector = collector
 
     def _create_unique_id(self, product, doc_type, doc_id):
-        product, doc_type, doc_id = \
-            self._apply_empty_facets_workaround(product, doc_type, doc_id)
-
         if product:
             return u"%s:%s:%s" % (product, doc_type, doc_id)
         else:
             return u"%s:%s" % (doc_type, doc_id)
 
-    def _apply_empty_facets_workaround(self, product, doc_type, doc_id):
-        # Apply the same workaround that is used at insertion time
-        doc = {
-            IndexFields.PRODUCT: product,
-            IndexFields.TYPE: doc_type,
-            IndexFields.ID: doc_id,
-        }
-        WhooshEmptyFacetErrorWorkaround(self.env).pre_process(doc)
-        return (doc[IndexFields.PRODUCT],
-                doc[IndexFields.TYPE],
-                doc[IndexFields.ID])
-
     def _to_whoosh_format(self, value):
         if isinstance(value, basestring):
             value = unicode(value)
@@ -472,90 +453,6 @@ class WhooshEmFormatter(whoosh.highlight
     template = '<em>%(t)s</em>'
 
 
-class WhooshEmptyFacetErrorWorkaround(Component):
-    """
-        Whoosh 2.4.1 raises "IndexError: list index out of range"
-        when search contains facets on field that is missing in at least one
-        document in the index. The error manifests only when index contains
-        more than one segment.
-
-        The goal of this class is to temporary solve the problem for
-        prototype phase. Fro non-prototype phase, the problem should be solved
-        by the next version of Whoosh.
-
-        Remove this class when fixed version of Whoosh is introduced.
-    """
-    implements(IDocIndexPreprocessor)
-    implements(IResultPostprocessor)
-    implements(IQueryPreprocessor)
-
-    NULL_MARKER = u"empty"
-
-    should_not_be_empty_fields = [
-        IndexFields.STATUS,
-        TicketFields.MILESTONE,
-        TicketFields.COMPONENT,
-        IndexFields.PRODUCT,
-    ]
-
-    #IDocIndexPreprocessor methods
-    def pre_process(self, doc):
-        for field in self.should_not_be_empty_fields:
-            if field not in doc or doc[field] is None or doc[field] == empty:
-                doc[field] = self.NULL_MARKER
-
-    #IResultPostprocessor methods
-    def post_process(self, query_result):
-        #fix facets
-        if query_result.facets:
-            for count_dict in query_result.facets.values():
-                for field, count in count_dict.iteritems():
-                    if field == self.NULL_MARKER:
-                        count_dict[None] = count
-                        del count_dict[self.NULL_MARKER]
-
-        #fix query_result.docs
-        for doc in query_result.docs:
-            for field, value in doc.items():
-                if value == self.NULL_MARKER:
-                    del doc[field]
-
-    #IQueryPreprocessor methods
-    def query_pre_process(self, query_parameters, context=None):
-        """
-        Go through filter queries and replace "NOT (field_name:*)" query with
-        "field_name:NULL_MARKER" query.
-
-        This is really quick fix to make prototype working with hope that
-        the next Whoosh version will be released soon.
-        """
-        # pylint: disable=unused-argument
-        if "filter" in query_parameters and query_parameters["filter"]:
-            term_to_replace = \
-                self._find_and_fix_condition(query_parameters["filter"])
-            if term_to_replace:
-                query_parameters["filter"] = term_to_replace
-        if "query" in query_parameters and query_parameters["query"]:
-            term_to_replace = \
-                self._find_and_fix_condition(query_parameters["query"])
-            if term_to_replace:
-                query_parameters["query"] = term_to_replace
-
-    def _find_and_fix_condition(self, filter_condition):
-        if isinstance(filter_condition, whoosh.query.CompoundQuery):
-            sub_queries = list(filter_condition.subqueries)
-            for i, subquery in enumerate(sub_queries):
-                term_to_replace = self._find_and_fix_condition(subquery)
-                if term_to_replace:
-                    filter_condition.subqueries[i] = term_to_replace
-        elif isinstance(filter_condition, whoosh.query.Not):
-            not_query = filter_condition.query
-            if isinstance(not_query, whoosh.query.Every) and \
-               not_query.fieldname in self.should_not_be_empty_fields:
-                return whoosh.query.Term(not_query.fieldname, self.NULL_MARKER)
-        return None
-
-
 class AdvancedFilterCollector(FilterCollector):
     """An advanced filter collector, accepting a callback function that
     will be called for each document to determine whether it should be

Modified: bloodhound/trunk/bloodhound_search/setup.py
URL: http://svn.apache.org/viewvc/bloodhound/trunk/bloodhound_search/setup.py?rev=1566160&r1=1566159&r2=1566160&view=diff
==============================================================================
--- bloodhound/trunk/bloodhound_search/setup.py (original)
+++ bloodhound/trunk/bloodhound_search/setup.py Sat Feb  8 23:01:51 2014
@@ -147,7 +147,7 @@ setup(
     install_requires = [
         'setuptools>=0.6b1',
         'Trac>=0.11',
-        'whoosh==2.4.1',
+        'whoosh>=2.5.1',
     ],
     package_dir = dict([p, i[0]] for p, i in PKG_INFO.iteritems()),
     packages = PKG_INFO.keys(),