You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mh...@apache.org on 2007/01/05 01:05:17 UTC

svn commit: r492823 - in /lucene/java/trunk/contrib/xml-query-parser/src: java/org/apache/lucene/xmlparser/ java/org/apache/lucene/xmlparser/builders/ test/org/apache/lucene/xmlparser/

Author: mharwood
Date: Thu Jan  4 16:05:17 2007
New Revision: 492823

URL: http://svn.apache.org/viewvc?view=rev&rev=492823
Log:
Added new "CachedFilter" feature to XML syntax enabling any queries or filters to be cached for better repeat performance. Added JUnit test and example XML file. Also fixed ClassCastException in DOMUtils which occured when getAttributeWithInheritance reached the root of a document without finding the required attribute.

Added:
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java
    lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/CachedFilter.xml
Modified:
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/CoreParser.java
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/DOMUtils.java
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/FilterBuilderFactory.java
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryBuilderFactory.java
    lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java

Modified: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/CoreParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/CoreParser.java?view=diff&rev=492823&r1=492822&r2=492823
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/CoreParser.java (original)
+++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/CoreParser.java Thu Jan  4 16:05:17 2007
@@ -12,6 +12,7 @@
 import org.apache.lucene.xmlparser.builders.ConstantScoreQueryBuilder;
 import org.apache.lucene.xmlparser.builders.FilteredQueryBuilder;
 import org.apache.lucene.xmlparser.builders.MatchAllDocsQueryBuilder;
+import org.apache.lucene.xmlparser.builders.CachedFilterBuilder;
 import org.apache.lucene.xmlparser.builders.RangeFilterBuilder;
 import org.apache.lucene.xmlparser.builders.SpanFirstBuilder;
 import org.apache.lucene.xmlparser.builders.SpanNearBuilder;
@@ -38,6 +39,9 @@
 	protected QueryParser parser;
 	protected QueryBuilderFactory queryFactory;
 	protected FilterBuilderFactory filterFactory;
+	//Controls the max size of the LRU cache used for QueryFilter objects parsed.
+	public static int maxNumCachedFilters=20;
+
 
 	public CoreParser(Analyzer analyzer, QueryParser parser)
 	{
@@ -55,6 +59,10 @@
 		queryFactory.addBuilder("UserQuery",new UserInputQueryBuilder(parser));
 		queryFactory.addBuilder("FilteredQuery",new FilteredQueryBuilder(filterFactory,queryFactory));
 		queryFactory.addBuilder("ConstantScoreQuery",new ConstantScoreQueryBuilder(filterFactory));
+		
+		filterFactory.addBuilder("CachedFilter",new CachedFilterBuilder(queryFactory,
+							filterFactory, maxNumCachedFilters));
+		
 		
 		SpanQueryBuilderFactory sqof=new SpanQueryBuilderFactory();
 

Modified: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/DOMUtils.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/DOMUtils.java?view=diff&rev=492823&r1=492822&r2=492823
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/DOMUtils.java (original)
+++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/DOMUtils.java Thu Jan  4 16:05:17 2007
@@ -99,8 +99,12 @@
 			{
 				return null;
 			}
-			Element parent=(Element) n;
-			return getAttributeWithInheritance(parent,attributeName);
+			if(n instanceof Element)
+			{
+				Element parent=(Element) n;
+				return getAttributeWithInheritance(parent,attributeName);
+			}
+			return null; //we reached the top level of the document without finding attribute
 		}
 		return result;		
 	}
@@ -250,7 +254,7 @@
 		}
 
 		return doc;
-	}
+	}	
 }
 
 

Modified: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/FilterBuilderFactory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/FilterBuilderFactory.java?view=diff&rev=492823&r1=492822&r2=492823
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/FilterBuilderFactory.java (original)
+++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/FilterBuilderFactory.java Thu Jan  4 16:05:17 2007
@@ -27,5 +27,8 @@
 	{
 		builders.put(nodeName,builder);
 	}
-	
+	public FilterBuilder getFilterBuilder(String nodeName)
+	{
+		return (FilterBuilder) builders.get(nodeName);		
+	}	
 }

Modified: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryBuilderFactory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryBuilderFactory.java?view=diff&rev=492823&r1=492822&r2=492823
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryBuilderFactory.java (original)
+++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryBuilderFactory.java Thu Jan  4 16:05:17 2007
@@ -27,5 +27,9 @@
 	{
 		builders.put(nodeName,builder);
 	}
-
+	public QueryBuilder getQueryBuilder(String nodeName)
+	{
+		return (QueryBuilder) builders.get(nodeName);		
+	}
+	
 }

Added: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java?view=auto&rev=492823
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java (added)
+++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java Thu Jan  4 16:05:17 2007
@@ -0,0 +1,123 @@
+/*
+ * Created on 25-Jan-2006
+ */
+package org.apache.lucene.xmlparser.builders;
+
+import java.util.Map.Entry;
+
+import org.apache.lucene.search.CachingWrapperFilter;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryFilter;
+import org.apache.lucene.xmlparser.DOMUtils;
+import org.apache.lucene.xmlparser.FilterBuilder;
+import org.apache.lucene.xmlparser.FilterBuilderFactory;
+import org.apache.lucene.xmlparser.ParserException;
+import org.apache.lucene.xmlparser.QueryBuilder;
+import org.apache.lucene.xmlparser.QueryBuilderFactory;
+import org.w3c.dom.Element;
+
+/**
+ * Filters are cached in an LRU Cache keyed on the contained query or filter object. Using this will 
+ * speed up overall performance for repeated uses of the same expensive query/filter. The sorts of 
+ * queries/filters likely to benefit from caching need not necessarily be complex - e.g. simple 
+ * TermQuerys with a large DF (document frequency) can be expensive	on large indexes. 
+ * A good example of this might be a term query on a field with only 2 possible	values - 
+ * "true" or "false". In a large index, querying or filtering on this field requires reading 
+ * millions	of document ids from disk which can more usefully be cached as a filter bitset.
+ * 
+ * For Queries/Filters to be cached and reused the object must implement hashcode and
+ * equals methods correctly so that duplicate queries/filters can be detected in the cache.
+ * 
+ * The CoreParser.maxNumCachedFilters property can be used to control the size of the LRU 
+ * Cache established during the construction of CoreParser instances.
+ * 
+ * @author maharwood
+ */
+public class CachedFilterBuilder implements FilterBuilder {
+
+	private QueryBuilderFactory queryFactory;
+	private FilterBuilderFactory filterFactory;
+	
+    private  LRUCache filterCache = null;
+
+	private int cacheSize;
+
+	public CachedFilterBuilder(QueryBuilderFactory queryFactory, 
+			FilterBuilderFactory filterFactory,int cacheSize)
+	{
+		this.queryFactory=queryFactory;
+		this.filterFactory=filterFactory;
+		this.cacheSize=cacheSize;
+	}
+
+	public Filter getFilter(Element e) throws ParserException
+	{
+
+		Element childElement = DOMUtils.getFirstChildOrFail(e);
+
+		if (filterCache == null)
+		{
+			filterCache = new LRUCache(cacheSize);
+		}
+
+		// Test to see if child Element is a query or filter that needs to be
+		// cached
+		QueryBuilder qb = queryFactory.getQueryBuilder(childElement
+				.getNodeName());
+		Object cacheKey = null;
+		Query q = null;
+		Filter f = null;
+		if (qb != null)
+		{
+			q = qb.getQuery(childElement);
+			cacheKey = q;
+		} else
+		{
+			f = filterFactory.getFilter(childElement);
+			cacheKey = f;
+		}
+		Filter cachedFilter = null;
+		synchronized (filterCache)
+		{ // check cache
+			cachedFilter = (Filter) filterCache.get(cacheKey);
+			if (cachedFilter != null)
+			{
+				return cachedFilter; // cache hit
+			}
+		}
+		
+		//cache miss
+		if (qb != null)
+		{
+			cachedFilter = new QueryFilter(q);
+		} else
+		{
+			cachedFilter = new CachingWrapperFilter(f);
+		}
+
+		synchronized (filterCache)
+		{ // update cache
+			filterCache.put(cacheKey, cachedFilter);
+		}
+		return cachedFilter;
+	}
+	
+	static class LRUCache extends java.util.LinkedHashMap
+	{
+	    public LRUCache(int maxsize)
+	    {
+	        super(maxsize * 4 / 3 + 1, 0.75f, true);
+	        this.maxsize = maxsize;
+	    }
+
+	    protected int maxsize;
+
+	    protected boolean removeEldestEntry(Entry eldest)
+	    {
+	        return size() > maxsize;
+	    }
+
+	}
+
+}

Added: lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/CachedFilter.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/CachedFilter.xml?view=auto&rev=492823
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/CachedFilter.xml (added)
+++ lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/CachedFilter.xml Thu Jan  4 16:05:17 2007
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<FilteredQuery>
+	<Query>
+		<BooleanQuery fieldName="contents">
+			<Clause occurs="should">
+				<TermQuery>merger</TermQuery>
+			</Clause>
+			<Clause occurs="mustnot">
+				<TermQuery >sumitomo</TermQuery>		
+			</Clause>
+		</BooleanQuery>
+	</Query>
+	
+	<Filter>
+		<!--
+			CachedFilter elements can contain any Query or Filter. 
+			CachedFilters are cached in an LRU Cache keyed on the contained query/filter object. 
+			Using this will speed up overall performance for repeated uses of the same expensive 
+			query/filter. The sorts of queries likely to benefit from caching need not necessarily be 
+			complex - e.g. simple TermQuerys with a large DF (document frequency) can be expensive
+			on large indexes. A good example of this might be a term query on a field with only 2 possible 
+			values - "true" or "false". In a large index, querying or filtering on this field requires 
+			reading millions of document ids from disk which can more usefully be cached as a 
+			QueryFilter bitset.
+			
+			For Queries/Filters to be cached and reused the object must implement hashcode and
+			equals methods correctly so that duplicate queries/filters can be detected in the cache.
+			
+			The CoreParser.maxNumCachedFilters property can be used to control the size
+			of the LRU Cache established during the construction of CoreParser instances.
+			-->
+		<CachedFilter>
+			<!-- Example query to be cached for fast, repeated use -->
+			<TermQuery fieldName="contents">bank</TermQuery> 
+			<!-- Alternatively, a filter object can be cached ....
+				<RangeFilter fieldName="date" lowerTerm="19870409" upperTerm="19870412"/>
+			-->				
+		</CachedFilter>
+	</Filter>
+	
+</FilteredQuery>

Modified: lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java?view=diff&rev=492823&r1=492822&r2=492823
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java (original)
+++ lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java Thu Jan  4 16:05:17 2007
@@ -155,6 +155,11 @@
 			Query q=parse("NestedBooleanQuery.xml");
 			dumpResults("Nested Boolean query", q, 5);
 	}
+	public void testCachedFilterXML() throws ParserException, IOException
+	{
+			Query q=parse("CachedFilter.xml");
+			dumpResults("Cached filter", q, 5);
+	}