You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by mark12345 <ma...@yahoo.com.au> on 2013/03/23 14:29:32 UTC
Enumeration - SolrServer.queryAndStreamResponse vs custom streaming
caching client.
What are the performance characteristics and implications of the SolrServer
classes queryAndStreamResponse method over large result sets ( One hundred
thousand, one million records, etc ) ?
http://lucene.apache.org/solr/4_2_0/solr-solrj/org/apache/solr/client/solrj/SolrServer.html#queryAndStreamResponse%28org.apache.solr.common.params.SolrParams,%20org.apache.solr.client.solrj.StreamingResponseCallback%29
<http://lucene.apache.org/solr/4_2_0/solr-solrj/org/apache/solr/client/solrj/SolrServer.html#queryAndStreamResponse%28org.apache.solr.common.params.SolrParams,%20org.apache.solr.client.solrj.StreamingResponseCallback%29>
Would it be better to use create a custom streaming caching client, such as
the following SolrDocumentStreamingEnumeration class?
> package test;
>
> import java.util.Collections;
> import java.util.Enumeration;
> import java.util.LinkedList;
> import java.util.List;
>
> import org.apache.solr.client.solrj.SolrQuery;
> import org.apache.solr.client.solrj.SolrServerException;
> import org.apache.solr.client.solrj.impl.HttpSolrServer;
> import org.apache.solr.client.solrj.response.QueryResponse;
> import org.apache.solr.common.SolrDocument;
> import org.apache.solr.common.SolrDocumentList;
>
> public class SolrDocumentStreamingEnumeration implements Enumeration
> <SolrDocument>
> {
>
> protected HttpSolrServer server;
>
> protected SolrQuery solrQuery;
> protected int solrQueryPosition;
>
> private List
> <SolrDocument>
> cache;
> protected int cacheSize;
>
> public SolrDocumentStreamingEnumeration(HttpSolrServer server,
> SolrQuery solrQuery, int cacheSize) {
>
> this.server = server;
> this.solrQuery = solrQuery;
> this.cacheSize = cacheSize;
>
> reset();
> }
>
> public synchronized void reset() {
> cache = Collections.synchronizedList(new LinkedList
> <SolrDocument>
> ());
> solrQueryPosition = 0;
> }
>
> @Override
> public synchronized boolean hasMoreElements() {
>
> manageCache();
>
> if (cache.size() < 1) {
> // end of stream reached
> return false;
> } else {
> return true;
> }
>
>
> }
>
> @Override
> public synchronized SolrDocument nextElement() {
>
> manageCache();
>
> if(cache.size() < 1) {
> return null;
> }
>
> return cache.remove(0);
> }
>
> protected synchronized boolean manageCache() throws RuntimeException {
>
> if (cache.size() > 1) {
> return true;
> }
>
> try {
> return updateCache();
> } catch (SolrServerException e) {
> throw new RuntimeException(e);
> }
> }
>
> protected synchronized boolean updateCache() throws
> SolrServerException {
>
> solrQuery.setStart(solrQueryPosition);
> solrQuery.setRows(cacheSize);
>
> QueryResponse queryResponse = server.query(solrQuery);
>
> if (queryResponse.getStatus() != 0) {
> return false;
> }
>
> SolrDocumentList currentDocumentList = queryResponse.getResults();
>
> boolean success = cache.addAll(currentDocumentList);
> if (!success) {
> return false;
> }
>
> // only move the position on success
> solrQueryPosition = solrQueryPosition + cacheSize;
> return true;
>
> }
>
>
> }
I have also included the below is an example test class for demonstration
purposes.
> package test;
>
> import java.util.ArrayList;
> import java.util.HashSet;
> import java.util.List;
> import java.util.Set;
>
> import junit.framework.Assert;
>
> import org.apache.solr.client.solrj.SolrQuery;
> import org.apache.solr.client.solrj.impl.HttpSolrServer;
> import org.apache.solr.common.SolrDocument;
> import org.apache.solr.common.SolrInputDocument;
> import org.junit.After;
> import org.junit.Before;
> import org.junit.Test;
>
> import test.HttpSolrServerTestSupport;
>
> public class SolrDocumentStreamingEnumerationTest {
>
> private static HttpSolrServer server =
> HttpSolrServerTestSupport.getInstance().getServer();
>
> private List
> <SolrInputDocument>
> getDefaultSolrInputDocumentList() {
>
> List
> <SolrInputDocument>
> solrInputDocumentList = new ArrayList
> <SolrInputDocument>
> ();
>
> {
> SolrInputDocument solrInputDocument = new SolrInputDocument();
> solrInputDocument.addField("id", "1");
> solrInputDocument.addField("contents_s", "ONE");
> solrInputDocumentList.add(solrInputDocument);
> }
>
> {
> SolrInputDocument solrInputDocument = new SolrInputDocument();
> solrInputDocument.addField("id", "2");
> solrInputDocument.addField("contents_s", "TWO");
> solrInputDocumentList.add(solrInputDocument);
> }
>
> {
> SolrInputDocument solrInputDocument = new SolrInputDocument();
> solrInputDocument.addField("id", "3");
> solrInputDocument.addField("contents_s", "THREE");
> solrInputDocumentList.add(solrInputDocument);
> }
>
> return solrInputDocumentList;
>
> }
>
> private SolrQuery getDefaultSolrQuery() {
> SolrQuery solrQuery = new SolrQuery();
> solrQuery.setQuery("*:*");
> solrQuery.addFilterQuery("id:1 OR id:2 OR id:3");
> return solrQuery;
> }
>
> private void removeTestSolrDocumentsIfTheyExist() throws Exception {
> server.deleteById("1");
> server.deleteById("2");
> server.deleteById("3");
> server.commit();
> }
>
> @Before
> public void setUp() throws Exception {
> removeTestSolrDocumentsIfTheyExist();
> server.add(getDefaultSolrInputDocumentList());
> server.commit();
> }
>
>
> @After
> public void tearDown() throws Exception {
> removeTestSolrDocumentsIfTheyExist();
> }
>
>
> private void testEnumeration(
> SolrDocumentStreamingEnumeration beanEnumeration, Integer
> maxElementCountToRetrieve) {
>
> Set
> <String>
> beanIds = new HashSet
> <String>
> ();
>
> int loopCount = 0;
> while (beanEnumeration.hasMoreElements()) {
> loopCount++;
> SolrDocument doc = beanEnumeration.nextElement();
> beanIds.add(String.valueOf(doc.get("id")));
>
> if (maxElementCountToRetrieve != null &&
> maxElementCountToRetrieve.intValue() == loopCount ){
> break;
> }
> }
>
> Assert.assertTrue(beanIds.size() > 0);
> Assert.assertEquals(loopCount, beanIds.size());
> }
>
> private void testEnumerationAtEnd(SolrDocumentStreamingEnumeration
> beanEnumeration ) {
>
> // Check that end of enumeration has been reached
> for (int i = 0; i < 100; i++) {
> Assert.assertFalse(beanEnumeration.hasMoreElements());
> Assert.assertNull(beanEnumeration.nextElement());
> }
> }
>
>
> @Test
> public void testEnumeration() {
>
> SolrQuery solrQuery = getDefaultSolrQuery();
> final int cacheSize = 1;
>
> SolrDocumentStreamingEnumeration beanEnumeration =
> new SolrDocumentStreamingEnumeration(
> server,
> solrQuery, cacheSize);
>
> testEnumeration(beanEnumeration, null);
>
> testEnumerationAtEnd(beanEnumeration);
>
> }
>
> @Test
> public void testReset() {
>
> SolrQuery solrQuery = getDefaultSolrQuery();
>
> final int cacheSize = 10;
>
> SolrDocumentStreamingEnumeration beanEnumeration =
> new SolrDocumentStreamingEnumeration(
> server, solrQuery, cacheSize);
>
>
> testEnumeration(beanEnumeration, null);
>
> beanEnumeration.reset();
>
> testEnumeration(beanEnumeration, null);
>
> Assert.assertFalse(beanEnumeration.hasMoreElements());
> Assert.assertNull(beanEnumeration.nextElement());
>
> beanEnumeration.reset();
>
> Assert.assertTrue(beanEnumeration.hasMoreElements());
> Assert.assertNotNull(beanEnumeration.nextElement());
>
> }
>
> @Test
> public void testLargeCache() {
>
> SolrQuery solrQuery = getDefaultSolrQuery();
>
> final int cacheSize = 1000000000; // 1 Billion
>
> SolrDocumentStreamingEnumeration beanEnumeration =
> new SolrDocumentStreamingEnumeration(
> server, solrQuery, cacheSize);
>
>
> testEnumeration(beanEnumeration, null);
>
> testEnumerationAtEnd(beanEnumeration);
>
> }
>
>
> }
--
View this message in context: http://lucene.472066.n3.nabble.com/Enumeration-SolrServer-queryAndStreamResponse-vs-custom-streaming-caching-client-tp4050743.html
Sent from the Solr - User mailing list archive at Nabble.com.