You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bloodhound.apache.org by sa...@apache.org on 2014/02/09 00:01:51 UTC
svn commit: r1566160 - in /bloodhound/trunk/bloodhound_search:
bhsearch/tests/whoosh_backend.py bhsearch/whoosh_backend.py setup.py
Author: saintgermain
Date: Sat Feb 8 23:01:51 2014
New Revision: 1566160
URL: http://svn.apache.org/r1566160
Log:
Update Whoosh dependency to 2.5.1 or superior. Refs #741.
- Remove the Bloodhound Whoosh workaround/fix (has been fixed in Whoosh 2.5.1)
- Update the tests
- Whoosh 'score' is now a float instead of a string)
- Detection of the fix is not necessary anymore
- Update the Whoosh dependency in bloodhound_search setup
Modified:
bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
bloodhound/trunk/bloodhound_search/setup.py
Modified: bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
URL: http://svn.apache.org/viewvc/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py?rev=1566160&r1=1566159&r2=1566160&view=diff
==============================================================================
--- bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py (original)
+++ bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py Sat Feb 8 23:01:51 2014
@@ -28,8 +28,7 @@ from bhsearch.api import ASC, DESC, SCOR
from bhsearch.query_parser import DefaultQueryParser
from bhsearch.tests import unittest
from bhsearch.tests.base import BaseBloodhoundSearchTest
-from bhsearch.whoosh_backend import WhooshBackend, \
- WhooshEmptyFacetErrorWorkaround
+from bhsearch.whoosh_backend import WhooshBackend
from whoosh import index, query, sorting
from whoosh.fields import ID, KEYWORD, TEXT, Schema
from whoosh.qparser import MultifieldParser, MultifieldPlugin, PhrasePlugin, \
@@ -54,12 +53,12 @@ class WhooshBackendTestCase(BaseBloodhou
self.assertEqual(2, result.hits)
docs = result.docs
self.assertEqual(
- {'id': u'1', 'type': u'ticket', 'unique_id': u'empty:ticket:1',
- 'score': u'1'},
+ {'id': u'1', 'type': u'ticket', 'unique_id': u'ticket:1',
+ 'score': 0},
docs[0])
self.assertEqual(
- {'id': u'2', 'type': u'ticket', 'unique_id': u'empty:ticket:2',
- 'score': u'2'},
+ {'id': u'2', 'type': u'ticket', 'unique_id': u'ticket:2',
+ 'score': 1},
docs[1])
def test_can_return_all_fields(self):
@@ -68,7 +67,7 @@ class WhooshBackendTestCase(BaseBloodhou
self.print_result(result)
docs = result.docs
self.assertEqual(
- {'id': u'1', 'type': u'ticket', 'unique_id': u'empty:ticket:1',
+ {'id': u'1', 'type': u'ticket', 'unique_id': u'ticket:1',
"score": 1.0},
docs[0])
@@ -476,43 +475,6 @@ class WhooshFunctionalityTestCase(unitte
{'status': {None: 1, 'New': 1}, 'type': {'type1': 1, 'type2': 1}},
facets)
- def test_out_of_range_on_empty_facets(self):
- """
- Whoosh raises exception IndexError: list index out of range
- when search contains facets on field that is missing in at least one
- document in the index. The error manifests only when index contains
- more than one segment
-
- The problem expected to be fixed in the next release.
-
- For the time of being, whoosh-backend have to introduce workaround in
- order to fix the problem. This unit-test is just a reminder to remove
- workaround when the fixed version of Whoosh is applied.
- """
- schema = Schema(
- unique_id=ID(stored=True, unique=True),
- status=ID(stored=True),
- )
-
-# ix = RamStorage().create_index(schema)
- ix = index.create_in(self.index_dir, schema=schema)
- def insert_docs():
- with ix.writer() as w:
- for i in range(10):
- w.add_document(unique_id=unicode(i))
-
- #the problem occurs only when index contains more than one segment
- insert_docs()
- insert_docs()
-
- with ix.searcher() as s:
- with self.assertRaises(IndexError):
- s.search(
- query.Every(),
- groupedby=(u"status"),
- maptype=sorting.Count,
- )
-
def _load_facets(self, non_paged_results):
facet_names = non_paged_results.facet_names()
if not facet_names:
@@ -602,56 +564,10 @@ class WhooshFunctionalityTestCase(unitte
self.assertEquals(len(r), 0)
-class WhooshEmptyFacetErrorWorkaroundTestCase(BaseBloodhoundSearchTest):
- def setUp(self):
- super(WhooshEmptyFacetErrorWorkaroundTestCase, self).setUp()
- self.whoosh_backend = WhooshBackend(self.env)
- self.whoosh_backend.recreate_index()
- self.parser = DefaultQueryParser(self.env)
- self.empty_facet_workaround = WhooshEmptyFacetErrorWorkaround(self.env)
-
- def tearDown(self):
- shutil.rmtree(self.env.path)
- self.env.reset_db()
-
- def test_set_should_not_be_empty_fields(self):
- self.insert_ticket("test x")
- result = self.whoosh_backend.query(query.Every())
- self.print_result(result)
- doc = result.docs[0]
- null_marker = WhooshEmptyFacetErrorWorkaround.NULL_MARKER
- self.assertEqual(null_marker, doc["component"])
- self.assertEqual(null_marker, doc["status"])
- self.assertEqual(null_marker, doc["milestone"])
-
- def test_can_fix_query_filter(self):
- parsed_filter = self.parser.parse_filters(
- ["type:ticket", "NOT (milestone:*)"])
- query_parameters = dict(filter=parsed_filter)
- self.empty_facet_workaround.query_pre_process(
- query_parameters)
-
- result_filter = query_parameters["filter"]
- self.assertEquals('(type:ticket AND milestone:empty)',
- str(result_filter))
-
- def test_does_interfere_query_filter_if_not_needed(self):
- parsed_filter = self.parser.parse_filters(
- ["type:ticket", "milestone:aaa"])
- query_parameters = dict(filter=parsed_filter)
- self.empty_facet_workaround.query_pre_process(
- query_parameters)
-
- result_filter = query_parameters["filter"]
- self.assertEquals('(type:ticket AND milestone:aaa)',
- str(result_filter))
-
def suite():
test_suite = unittest.TestSuite()
test_suite.addTest(unittest.makeSuite(WhooshBackendTestCase, 'test'))
test_suite.addTest(unittest.makeSuite(WhooshFunctionalityTestCase, 'test'))
- test_suite.addTest(
- unittest.makeSuite(WhooshEmptyFacetErrorWorkaroundTestCase, 'test'))
return test_suite
if __name__ == '__main__':
Modified: bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
URL: http://svn.apache.org/viewvc/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py?rev=1566160&r1=1566159&r2=1566160&view=diff
==============================================================================
--- bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py (original)
+++ bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py Sat Feb 8 23:01:51 2014
@@ -39,10 +39,6 @@ from whoosh.collectors import FilterColl
from whoosh.writing import AsyncWriter
from datetime import datetime
-from bhsearch.whoosh_fixes import fixes_for
-for fix in fixes_for(whoosh.__version__):
- apply(fix)
-
UNIQUE_ID = "unique_id"
@@ -283,26 +279,11 @@ class WhooshBackend(Component):
searcher.collector = collector
def _create_unique_id(self, product, doc_type, doc_id):
- product, doc_type, doc_id = \
- self._apply_empty_facets_workaround(product, doc_type, doc_id)
-
if product:
return u"%s:%s:%s" % (product, doc_type, doc_id)
else:
return u"%s:%s" % (doc_type, doc_id)
- def _apply_empty_facets_workaround(self, product, doc_type, doc_id):
- # Apply the same workaround that is used at insertion time
- doc = {
- IndexFields.PRODUCT: product,
- IndexFields.TYPE: doc_type,
- IndexFields.ID: doc_id,
- }
- WhooshEmptyFacetErrorWorkaround(self.env).pre_process(doc)
- return (doc[IndexFields.PRODUCT],
- doc[IndexFields.TYPE],
- doc[IndexFields.ID])
-
def _to_whoosh_format(self, value):
if isinstance(value, basestring):
value = unicode(value)
@@ -472,90 +453,6 @@ class WhooshEmFormatter(whoosh.highlight
template = '<em>%(t)s</em>'
-class WhooshEmptyFacetErrorWorkaround(Component):
- """
- Whoosh 2.4.1 raises "IndexError: list index out of range"
- when search contains facets on field that is missing in at least one
- document in the index. The error manifests only when index contains
- more than one segment.
-
- The goal of this class is to temporary solve the problem for
- prototype phase. Fro non-prototype phase, the problem should be solved
- by the next version of Whoosh.
-
- Remove this class when fixed version of Whoosh is introduced.
- """
- implements(IDocIndexPreprocessor)
- implements(IResultPostprocessor)
- implements(IQueryPreprocessor)
-
- NULL_MARKER = u"empty"
-
- should_not_be_empty_fields = [
- IndexFields.STATUS,
- TicketFields.MILESTONE,
- TicketFields.COMPONENT,
- IndexFields.PRODUCT,
- ]
-
- #IDocIndexPreprocessor methods
- def pre_process(self, doc):
- for field in self.should_not_be_empty_fields:
- if field not in doc or doc[field] is None or doc[field] == empty:
- doc[field] = self.NULL_MARKER
-
- #IResultPostprocessor methods
- def post_process(self, query_result):
- #fix facets
- if query_result.facets:
- for count_dict in query_result.facets.values():
- for field, count in count_dict.iteritems():
- if field == self.NULL_MARKER:
- count_dict[None] = count
- del count_dict[self.NULL_MARKER]
-
- #fix query_result.docs
- for doc in query_result.docs:
- for field, value in doc.items():
- if value == self.NULL_MARKER:
- del doc[field]
-
- #IQueryPreprocessor methods
- def query_pre_process(self, query_parameters, context=None):
- """
- Go through filter queries and replace "NOT (field_name:*)" query with
- "field_name:NULL_MARKER" query.
-
- This is really quick fix to make prototype working with hope that
- the next Whoosh version will be released soon.
- """
- # pylint: disable=unused-argument
- if "filter" in query_parameters and query_parameters["filter"]:
- term_to_replace = \
- self._find_and_fix_condition(query_parameters["filter"])
- if term_to_replace:
- query_parameters["filter"] = term_to_replace
- if "query" in query_parameters and query_parameters["query"]:
- term_to_replace = \
- self._find_and_fix_condition(query_parameters["query"])
- if term_to_replace:
- query_parameters["query"] = term_to_replace
-
- def _find_and_fix_condition(self, filter_condition):
- if isinstance(filter_condition, whoosh.query.CompoundQuery):
- sub_queries = list(filter_condition.subqueries)
- for i, subquery in enumerate(sub_queries):
- term_to_replace = self._find_and_fix_condition(subquery)
- if term_to_replace:
- filter_condition.subqueries[i] = term_to_replace
- elif isinstance(filter_condition, whoosh.query.Not):
- not_query = filter_condition.query
- if isinstance(not_query, whoosh.query.Every) and \
- not_query.fieldname in self.should_not_be_empty_fields:
- return whoosh.query.Term(not_query.fieldname, self.NULL_MARKER)
- return None
-
-
class AdvancedFilterCollector(FilterCollector):
"""An advanced filter collector, accepting a callback function that
will be called for each document to determine whether it should be
Modified: bloodhound/trunk/bloodhound_search/setup.py
URL: http://svn.apache.org/viewvc/bloodhound/trunk/bloodhound_search/setup.py?rev=1566160&r1=1566159&r2=1566160&view=diff
==============================================================================
--- bloodhound/trunk/bloodhound_search/setup.py (original)
+++ bloodhound/trunk/bloodhound_search/setup.py Sat Feb 8 23:01:51 2014
@@ -147,7 +147,7 @@ setup(
install_requires = [
'setuptools>=0.6b1',
'Trac>=0.11',
- 'whoosh==2.4.1',
+ 'whoosh>=2.5.1',
],
package_dir = dict([p, i[0]] for p, i in PKG_INFO.iteritems()),
packages = PKG_INFO.keys(),