You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/02/04 10:30:09 UTC
svn commit: r1067121 - in /lucene/dev/branches/branch_3x: ./ lucene/ solr/ solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java

Author: uschindler
Date: Fri Feb  4 09:30:09 2011
New Revision: 1067121

URL: http://svn.apache.org/viewvc?rev=1067121&view=rev
Log:
SOLR-96: Add test case

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java

Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java?rev=1067121&r1=1067120&r2=1067121&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java Fri Feb  4 09:30:09 2011
@@ -30,8 +30,12 @@ import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
-import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.Reader;
 
 /**
  * A test for {@link DocumentAnalysisRequestHandler}.
@@ -71,15 +75,14 @@ public class DocumentAnalysisRequestHand
                     "</doc>" +
                     "</docs>";
 
-    final List<ContentStream> contentStreams = new ArrayList<ContentStream>(1);
-    contentStreams.add(new ContentStreamBase.StringStream(docsInput));
+    final ContentStream cs = new ContentStreamBase.StringStream(docsInput);
     ModifiableSolrParams params = new ModifiableSolrParams();
     params.add("analysis.query", "The Query String");
     params.add("analysis.showmatch", "true");
     SolrQueryRequest req = new SolrQueryRequestBase(h.getCore(), params) {
       @Override
       public Iterable<ContentStream> getContentStreams() {
-        return contentStreams;
+        return Collections.singleton(cs);
       }
     };
 
@@ -106,6 +109,94 @@ public class DocumentAnalysisRequestHand
     req.close();
   }
 
+  /** A binary-only ContentStream */
+  static class ByteStream extends ContentStreamBase {
+    private final byte[] bytes;
+    
+    public ByteStream(byte[] bytes, String contentType) {
+      this.bytes = bytes; 
+      this.contentType = contentType;
+      name = null;
+      size = Long.valueOf(bytes.length);
+      sourceInfo = "rawBytes";
+    }
+
+    public InputStream getStream() throws IOException {
+      return new ByteArrayInputStream(bytes);
+    }
+
+    @Override
+    public Reader getReader() throws IOException {
+      throw new IOException("This is a byte stream, Readers are not supported.");
+    }
+  }
+
+  
+  // This test should also test charset detection in UpdateRequestHandler,
+  // but the DocumentAnalysisRequestHandler is simplier to use/check.
+  @Test
+  public void testCharsetInDocument() throws Exception {
+    final byte[] xmlBytes = (
+      "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\r\n" +
+      "<docs>\r\n" +
+      " <doc>\r\n" +
+      "  <field name=\"id\">MÃ¼ller</field>\r\n" +
+      " </doc>" +
+      "</docs>"
+    ).getBytes("ISO-8859-1");
+    
+    // we declare a content stream without charset:
+    final ContentStream cs = new ByteStream(xmlBytes, "application/xml");
+    
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    SolrQueryRequest req = new SolrQueryRequestBase(h.getCore(), params) {
+      @Override
+      public Iterable<ContentStream> getContentStreams() {
+        return Collections.singleton(cs);
+      }
+    };
+
+    DocumentAnalysisRequest request = handler.resolveAnalysisRequest(req);
+    assertNotNull(request);
+    final List<SolrInputDocument> documents = request.getDocuments();
+    assertNotNull(documents);
+    assertEquals(1, documents.size());
+    SolrInputDocument doc = documents.get(0);
+    assertEquals("MÃ¼ller", doc.getField("id").getValue());
+  }
+
+  // This test should also test charset detection in UpdateRequestHandler,
+  // but the DocumentAnalysisRequestHandler is simplier to use/check.
+  @Test
+  public void testCharsetOutsideDocument() throws Exception {
+    final byte[] xmlBytes = (
+      "<docs>\r\n" +
+      " <doc>\r\n" +
+      "  <field name=\"id\">MÃ¼ller</field>\r\n" +
+      " </doc>" +
+      "</docs>"
+    ).getBytes("ISO-8859-1");
+    
+    // we declare a content stream without charset:
+    final ContentStream cs = new ByteStream(xmlBytes, "application/xml; charset=ISO-8859-1");
+    
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    SolrQueryRequest req = new SolrQueryRequestBase(h.getCore(), params) {
+      @Override
+      public Iterable<ContentStream> getContentStreams() {
+        return Collections.singleton(cs);
+      }
+    };
+
+    DocumentAnalysisRequest request = handler.resolveAnalysisRequest(req);
+    assertNotNull(request);
+    final List<SolrInputDocument> documents = request.getDocuments();
+    assertNotNull(documents);
+    assertEquals(1, documents.size());
+    SolrInputDocument doc = documents.get(0);
+    assertEquals("MÃ¼ller", doc.getField("id").getValue());
+  }
+
   /**
    * Tests the {@link DocumentAnalysisRequestHandler#handleAnalysisRequest(org.apache.solr.client.solrj.request.DocumentAnalysisRequest,
    * org.apache.solr.schema.IndexSchema)}