You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/02/04 10:30:09 UTC
svn commit: r1067121 - in /lucene/dev/branches/branch_3x: ./ lucene/ solr/
solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
Author: uschindler
Date: Fri Feb 4 09:30:09 2011
New Revision: 1067121
URL: http://svn.apache.org/viewvc?rev=1067121&view=rev
Log:
SOLR-96: Add test case
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java?rev=1067121&r1=1067120&r2=1067121&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java Fri Feb 4 09:30:09 2011
@@ -30,8 +30,12 @@ import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
-import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.Reader;
/**
* A test for {@link DocumentAnalysisRequestHandler}.
@@ -71,15 +75,14 @@ public class DocumentAnalysisRequestHand
"</doc>" +
"</docs>";
- final List<ContentStream> contentStreams = new ArrayList<ContentStream>(1);
- contentStreams.add(new ContentStreamBase.StringStream(docsInput));
+ final ContentStream cs = new ContentStreamBase.StringStream(docsInput);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("analysis.query", "The Query String");
params.add("analysis.showmatch", "true");
SolrQueryRequest req = new SolrQueryRequestBase(h.getCore(), params) {
@Override
public Iterable<ContentStream> getContentStreams() {
- return contentStreams;
+ return Collections.singleton(cs);
}
};
@@ -106,6 +109,94 @@ public class DocumentAnalysisRequestHand
req.close();
}
+ /** A binary-only ContentStream */
+ static class ByteStream extends ContentStreamBase {
+ private final byte[] bytes;
+
+ public ByteStream(byte[] bytes, String contentType) {
+ this.bytes = bytes;
+ this.contentType = contentType;
+ name = null;
+ size = Long.valueOf(bytes.length);
+ sourceInfo = "rawBytes";
+ }
+
+ public InputStream getStream() throws IOException {
+ return new ByteArrayInputStream(bytes);
+ }
+
+ @Override
+ public Reader getReader() throws IOException {
+ throw new IOException("This is a byte stream, Readers are not supported.");
+ }
+ }
+
+
+ // This test should also test charset detection in UpdateRequestHandler,
+ // but the DocumentAnalysisRequestHandler is simplier to use/check.
+ @Test
+ public void testCharsetInDocument() throws Exception {
+ final byte[] xmlBytes = (
+ "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\r\n" +
+ "<docs>\r\n" +
+ " <doc>\r\n" +
+ " <field name=\"id\">Müller</field>\r\n" +
+ " </doc>" +
+ "</docs>"
+ ).getBytes("ISO-8859-1");
+
+ // we declare a content stream without charset:
+ final ContentStream cs = new ByteStream(xmlBytes, "application/xml");
+
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ SolrQueryRequest req = new SolrQueryRequestBase(h.getCore(), params) {
+ @Override
+ public Iterable<ContentStream> getContentStreams() {
+ return Collections.singleton(cs);
+ }
+ };
+
+ DocumentAnalysisRequest request = handler.resolveAnalysisRequest(req);
+ assertNotNull(request);
+ final List<SolrInputDocument> documents = request.getDocuments();
+ assertNotNull(documents);
+ assertEquals(1, documents.size());
+ SolrInputDocument doc = documents.get(0);
+ assertEquals("Müller", doc.getField("id").getValue());
+ }
+
+ // This test should also test charset detection in UpdateRequestHandler,
+ // but the DocumentAnalysisRequestHandler is simplier to use/check.
+ @Test
+ public void testCharsetOutsideDocument() throws Exception {
+ final byte[] xmlBytes = (
+ "<docs>\r\n" +
+ " <doc>\r\n" +
+ " <field name=\"id\">Müller</field>\r\n" +
+ " </doc>" +
+ "</docs>"
+ ).getBytes("ISO-8859-1");
+
+ // we declare a content stream without charset:
+ final ContentStream cs = new ByteStream(xmlBytes, "application/xml; charset=ISO-8859-1");
+
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ SolrQueryRequest req = new SolrQueryRequestBase(h.getCore(), params) {
+ @Override
+ public Iterable<ContentStream> getContentStreams() {
+ return Collections.singleton(cs);
+ }
+ };
+
+ DocumentAnalysisRequest request = handler.resolveAnalysisRequest(req);
+ assertNotNull(request);
+ final List<SolrInputDocument> documents = request.getDocuments();
+ assertNotNull(documents);
+ assertEquals(1, documents.size());
+ SolrInputDocument doc = documents.get(0);
+ assertEquals("Müller", doc.getField("id").getValue());
+ }
+
/**
* Tests the {@link DocumentAnalysisRequestHandler#handleAnalysisRequest(org.apache.solr.client.solrj.request.DocumentAnalysisRequest,
* org.apache.solr.schema.IndexSchema)}