You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by mark12345 <ma...@yahoo.com.au> on 2013/03/23 14:29:32 UTC
Enumeration - SolrServer.queryAndStreamResponse vs custom streaming caching client.

What are the performance characteristics and implications of the SolrServer
classes queryAndStreamResponse method over large result sets ( One hundred
thousand, one million records, etc ) ?

http://lucene.apache.org/solr/4_2_0/solr-solrj/org/apache/solr/client/solrj/SolrServer.html#queryAndStreamResponse%28org.apache.solr.common.params.SolrParams,%20org.apache.solr.client.solrj.StreamingResponseCallback%29
<http://lucene.apache.org/solr/4_2_0/solr-solrj/org/apache/solr/client/solrj/SolrServer.html#queryAndStreamResponse%28org.apache.solr.common.params.SolrParams,%20org.apache.solr.client.solrj.StreamingResponseCallback%29>  


Would it be better to use create a custom streaming caching client, such as
the following SolrDocumentStreamingEnumeration class?



> package test;
> 
> import java.util.Collections;
> import java.util.Enumeration;
> import java.util.LinkedList;
> import java.util.List;
> 
> import org.apache.solr.client.solrj.SolrQuery;
> import org.apache.solr.client.solrj.SolrServerException;
> import org.apache.solr.client.solrj.impl.HttpSolrServer;
> import org.apache.solr.client.solrj.response.QueryResponse;
> import org.apache.solr.common.SolrDocument;
> import org.apache.solr.common.SolrDocumentList;
> 
> public class SolrDocumentStreamingEnumeration implements Enumeration
> <SolrDocument>
>  {
> 
>     protected HttpSolrServer server;
> 
>     protected SolrQuery solrQuery;
>     protected int solrQueryPosition;
> 
>     private List
> <SolrDocument>
>  cache;
>     protected int cacheSize;
> 
>     public SolrDocumentStreamingEnumeration(HttpSolrServer server,
> SolrQuery solrQuery, int cacheSize) {
> 
>         this.server = server;
>         this.solrQuery = solrQuery;
>         this.cacheSize = cacheSize;
> 
>         reset();
>     }
> 
>     public synchronized void reset() {
>         cache = Collections.synchronizedList(new LinkedList
> <SolrDocument>
> ());
>         solrQueryPosition = 0;
>     }
> 
>     @Override
>     public synchronized boolean hasMoreElements() {
> 
>         manageCache();
> 
>         if (cache.size() < 1) {
>             // end of stream reached
>             return false;
>         } else {
>             return true;
>         }
> 
> 
>     }
> 
>     @Override
>     public synchronized SolrDocument nextElement() {
> 
>         manageCache();
> 
>         if(cache.size() < 1) {
>             return null;
>         }
> 
>         return cache.remove(0);
>     }
> 
>     protected synchronized boolean manageCache() throws RuntimeException {
> 
>         if (cache.size() > 1) {
>             return true;
>         }
> 
>         try {
>             return updateCache();
>         } catch (SolrServerException e) {
>             throw new RuntimeException(e);
>         }
>     }
> 
>     protected synchronized boolean updateCache() throws
> SolrServerException {
> 
>         solrQuery.setStart(solrQueryPosition);
>         solrQuery.setRows(cacheSize);
> 
>         QueryResponse queryResponse = server.query(solrQuery);
> 
>         if (queryResponse.getStatus() != 0) {
>             return false;
>         }
> 
>         SolrDocumentList currentDocumentList = queryResponse.getResults();
> 
>         boolean success = cache.addAll(currentDocumentList);
>         if (!success) {
>             return false;
>         }
> 
>         // only move the position on success
>         solrQueryPosition = solrQueryPosition + cacheSize;
>         return true;
> 
>     }
> 
> 
> }


I have also included the below is an example test class for demonstration
purposes.


> package test;
> 
> import java.util.ArrayList;
> import java.util.HashSet;
> import java.util.List;
> import java.util.Set;
> 
> import junit.framework.Assert;
> 
> import org.apache.solr.client.solrj.SolrQuery;
> import org.apache.solr.client.solrj.impl.HttpSolrServer;
> import org.apache.solr.common.SolrDocument;
> import org.apache.solr.common.SolrInputDocument;
> import org.junit.After;
> import org.junit.Before;
> import org.junit.Test;
> 
> import test.HttpSolrServerTestSupport;
> 
> public class SolrDocumentStreamingEnumerationTest {
> 
>     private static HttpSolrServer server =
> HttpSolrServerTestSupport.getInstance().getServer();
> 
>     private List
> <SolrInputDocument>
>  getDefaultSolrInputDocumentList() {
> 
>         List
> <SolrInputDocument>
>  solrInputDocumentList = new ArrayList
> <SolrInputDocument>
> ();
> 
>         {
>             SolrInputDocument solrInputDocument = new SolrInputDocument();
>             solrInputDocument.addField("id", "1");
>             solrInputDocument.addField("contents_s", "ONE");
>             solrInputDocumentList.add(solrInputDocument);
>         }
> 
>         {
>             SolrInputDocument solrInputDocument = new SolrInputDocument();
>             solrInputDocument.addField("id", "2");
>             solrInputDocument.addField("contents_s", "TWO");
>             solrInputDocumentList.add(solrInputDocument);
>         }
> 
>         {
>             SolrInputDocument solrInputDocument = new SolrInputDocument();
>             solrInputDocument.addField("id", "3");
>             solrInputDocument.addField("contents_s", "THREE");
>             solrInputDocumentList.add(solrInputDocument);
>         }
> 
>         return solrInputDocumentList;
> 
>     }
> 
>     private SolrQuery getDefaultSolrQuery() {
>         SolrQuery solrQuery = new SolrQuery();
>         solrQuery.setQuery("*:*");
>         solrQuery.addFilterQuery("id:1 OR id:2 OR id:3");
>         return solrQuery;
>     }
> 
>     private void removeTestSolrDocumentsIfTheyExist() throws Exception {
>         server.deleteById("1");
>         server.deleteById("2");
>         server.deleteById("3");
>         server.commit();
>     }
> 
>     @Before
>     public void setUp() throws Exception {
>         removeTestSolrDocumentsIfTheyExist();
>         server.add(getDefaultSolrInputDocumentList());
>         server.commit();
>     }
> 
> 
>     @After
>     public void tearDown() throws Exception {
>         removeTestSolrDocumentsIfTheyExist();
>     }
> 
> 
>     private void testEnumeration(
>             SolrDocumentStreamingEnumeration beanEnumeration, Integer
> maxElementCountToRetrieve) {
> 
>         Set
> <String>
>  beanIds = new HashSet
> <String>
> ();
> 
>          int loopCount = 0;
>          while (beanEnumeration.hasMoreElements()) {
>              loopCount++;
>              SolrDocument doc = beanEnumeration.nextElement();
>              beanIds.add(String.valueOf(doc.get("id")));
> 
>              if (maxElementCountToRetrieve != null &&
> maxElementCountToRetrieve.intValue() == loopCount ){
>                  break;
>              }
>          }
> 
>          Assert.assertTrue(beanIds.size() > 0);
>          Assert.assertEquals(loopCount, beanIds.size());
>     }
> 
>     private void testEnumerationAtEnd(SolrDocumentStreamingEnumeration
> beanEnumeration ) {
> 
>         // Check that end of enumeration has been reached
>         for (int i = 0; i < 100; i++) {
>             Assert.assertFalse(beanEnumeration.hasMoreElements());
>             Assert.assertNull(beanEnumeration.nextElement());
>         }
>     }
> 
> 
>     @Test
>     public void testEnumeration() {
> 
>         SolrQuery solrQuery = getDefaultSolrQuery();
>         final int cacheSize = 1;
> 
>         SolrDocumentStreamingEnumeration beanEnumeration =
>                 new SolrDocumentStreamingEnumeration(
>                         server,
>                         solrQuery, cacheSize);
> 
>         testEnumeration(beanEnumeration, null);
> 
>         testEnumerationAtEnd(beanEnumeration);
> 
>     }
> 
>     @Test
>     public void testReset() {
> 
>         SolrQuery solrQuery = getDefaultSolrQuery();
> 
>         final int cacheSize = 10;
> 
>         SolrDocumentStreamingEnumeration beanEnumeration =
>                 new SolrDocumentStreamingEnumeration(
>                         server, solrQuery, cacheSize);
> 
> 
>         testEnumeration(beanEnumeration, null);
> 
>         beanEnumeration.reset();
> 
>         testEnumeration(beanEnumeration, null);
> 
>         Assert.assertFalse(beanEnumeration.hasMoreElements());
>         Assert.assertNull(beanEnumeration.nextElement());
> 
>         beanEnumeration.reset();
> 
>         Assert.assertTrue(beanEnumeration.hasMoreElements());
>         Assert.assertNotNull(beanEnumeration.nextElement());
> 
>     }
> 
>     @Test
>     public void testLargeCache() {
> 
>         SolrQuery solrQuery = getDefaultSolrQuery();
> 
>         final int cacheSize = 1000000000;  // 1 Billion
> 
>         SolrDocumentStreamingEnumeration beanEnumeration =
>                 new SolrDocumentStreamingEnumeration(
>                         server, solrQuery, cacheSize);
> 
> 
>         testEnumeration(beanEnumeration, null);
> 
>         testEnumerationAtEnd(beanEnumeration);
> 
>     }
> 
> 
> }





--
View this message in context: http://lucene.472066.n3.nabble.com/Enumeration-SolrServer-queryAndStreamResponse-vs-custom-streaming-caching-client-tp4050743.html
Sent from the Solr - User mailing list archive at Nabble.com.