You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2007/03/30 18:59:59 UTC
svn commit: r524175 - in /lucene/solr/trunk: ./ example/exampledocs/
example/solr/conf/ lib/ src/java/org/apache/solr/handler/
src/java/org/apache/solr/request/ src/java/org/apache/solr/util/
src/test/org/apache/solr/handler/ src/test/test-files/solr/c...
Author: yonik
Date: Fri Mar 30 09:59:58 2007
New Revision: 524175
URL: http://svn.apache.org/viewvc?view=rev&rev=524175
Log:
CSV updates: SOLR-66
Added:
lucene/solr/trunk/example/exampledocs/books.csv (with props)
lucene/solr/trunk/lib/commons-csv-0.1-SNAPSHOT.jar (with props)
lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java (with props)
lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java (with props)
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/example/solr/conf/solrconfig.xml
lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java
lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java
lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?view=diff&rev=524175&r1=524174&r2=524175
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Fri Mar 30 09:59:58 2007
@@ -135,6 +135,8 @@
19. SOLR-197: New parameters for input: stream.contentType for specifying
or overriding the content type of input, and stream.file for reading
local files. (Ryan McKinley via yonik)
+
+20. SOLR-66: CSV data format for document additions and updates. (yonik)
Changes in runtime behavior
1. Highlighting using DisMax will only pick up terms from the main
Added: lucene/solr/trunk/example/exampledocs/books.csv
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/exampledocs/books.csv?view=auto&rev=524175
==============================================================================
--- lucene/solr/trunk/example/exampledocs/books.csv (added)
+++ lucene/solr/trunk/example/exampledocs/books.csv Fri Mar 30 09:59:58 2007
@@ -0,0 +1,11 @@
+id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s
+0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
+0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
+055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
+0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
+0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
+0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
+0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
+0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
+0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
+080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy
Propchange: lucene/solr/trunk/example/exampledocs/books.csv
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/example/exampledocs/books.csv
------------------------------------------------------------------------------
svn:executable = *
Modified: lucene/solr/trunk/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/solrconfig.xml?view=diff&rev=524175&r1=524174&r2=524175
==============================================================================
--- lucene/solr/trunk/example/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/example/solr/conf/solrconfig.xml Fri Mar 30 09:59:58 2007
@@ -384,6 +384,11 @@
<!-- NOTE, /update is mapped to a servlet, we can have the filter handle requests off that! -->
<requestHandler name="/update/commit" class="solr.CommitRequestHandler" />
+
+ <!-- CSV update handler, loaded on demand -->
+ <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
+ </requestHandler>
+
<!-- queryResponseWriter plugins... query responses will be written using the
writer specified by the 'wt' request parameter matching the name of a registered
Added: lucene/solr/trunk/lib/commons-csv-0.1-SNAPSHOT.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/lib/commons-csv-0.1-SNAPSHOT.jar?view=auto&rev=524175
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/lib/commons-csv-0.1-SNAPSHOT.jar
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/solr/trunk/lib/commons-csv-0.1-SNAPSHOT.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java?view=auto&rev=524175
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java Fri Mar 30 09:59:58 2007
@@ -0,0 +1,386 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.handler;
+
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrParams;
+import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.util.ContentStream;
+import org.apache.solr.core.SolrException;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.util.StrUtils;
+import org.apache.solr.update.*;
+import org.apache.commons.csv.CSVStrategy;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.io.IOUtils;
+
+import java.util.regex.Pattern;
+import java.util.List;
+import java.io.*;
+
+/**
+ * @author yonik
+ * @version $Id$
+ */
+
+public class CSVRequestHandler extends RequestHandlerBase {
+
+ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
+ CSVLoader loader = new SingleThreadedCSVLoader(req);
+
+ Iterable<ContentStream> streams = req.getContentStreams();
+ if (streams == null) {
+ throw new SolrException(400, "missing content stream");
+ }
+
+ for(ContentStream stream : streams) {
+ Reader reader = stream.getReader();
+ try {
+ loader.errHeader = "CSVLoader: input=" + stream.getSourceInfo();
+ loader.load(reader);
+ } finally {
+ IOUtils.closeQuietly(reader);
+ }
+ }
+ }
+
+ //////////////////////// SolrInfoMBeans methods //////////////////////
+ @Override
+ public String getDescription() {
+ return "Add/Update multiple documents with CSV formatted rows";
+ }
+
+ @Override
+ public String getVersion() {
+ return "$Revision:$";
+ }
+
+ @Override
+ public String getSourceId() {
+ return "$Id:$";
+ }
+
+ @Override
+ public String getSource() {
+ return "$URL:$";
+ }
+}
+
+
+abstract class CSVLoader {
+ static String SEPARATOR="separator";
+ static String FIELDNAMES="fieldnames";
+ static String HEADER="header";
+ static String SKIP="skip";
+ static String MAP="map";
+ static String TRIM="trim";
+ static String EMPTY="keepEmpty";
+ static String SPLIT="split";
+ static String ENCAPSULATOR="encapsulator";
+ static String COMMIT="commit";
+ static String OVERWRITE="overwrite";
+
+ private static Pattern colonSplit = Pattern.compile(":");
+ private static Pattern commaSplit = Pattern.compile(",");
+
+ final IndexSchema schema;
+ final SolrParams params;
+ final UpdateHandler handler;
+ final CSVStrategy strategy;
+
+ String[] fieldnames;
+ SchemaField[] fields;
+ CSVLoader.FieldAdder[] adders;
+
+ int skipLines; // number of lines to skip at start of file
+
+ final AddUpdateCommand templateAdd;
+
+
+ /** Add a field to a document unless it's zero length.
+ * The FieldAdder hierarchy handles all the complexity of
+ * further transforming or splitting field values to keep the
+ * main logic loop clean. All implementations of add() must be
+ * MT-safe!
+ */
+ private class FieldAdder {
+ void add(DocumentBuilder builder, int line, int column, String val) {
+ if (val.length() > 0) {
+ builder.addField(fields[column].getName(),val,1.0f);
+ }
+ }
+ }
+
+ /** add zero length fields */
+ private class FieldAdderEmpty extends CSVLoader.FieldAdder {
+ void add(DocumentBuilder builder, int line, int column, String val) {
+ builder.addField(fields[column].getName(),val,1.0f);
+ }
+ }
+
+ /** trim fields */
+ private class FieldTrimmer extends CSVLoader.FieldAdder {
+ private final CSVLoader.FieldAdder base;
+ FieldTrimmer(CSVLoader.FieldAdder base) { this.base=base; }
+ void add(DocumentBuilder builder, int line, int column, String val) {
+ base.add(builder, line, column, val.trim());
+ }
+ }
+
+ /** map a single value.
+ * for just a couple of mappings, this is probably faster than
+ * using a HashMap.
+ */
+ private class FieldMapperSingle extends CSVLoader.FieldAdder {
+ private final String from;
+ private final String to;
+ private final CSVLoader.FieldAdder base;
+ FieldMapperSingle(String from, String to, CSVLoader.FieldAdder base) {
+ this.from=from;
+ this.to=to;
+ this.base=base;
+ }
+ void add(DocumentBuilder builder, int line, int column, String val) {
+ if (from.equals(val)) val=to;
+ base.add(builder,line,column,val);
+ }
+ }
+
+ /** Split a single value into multiple values based on
+ * a CSVStrategy.
+ */
+ private class FieldSplitter extends CSVLoader.FieldAdder {
+ private final CSVStrategy strategy;
+ private final CSVLoader.FieldAdder base;
+ FieldSplitter(CSVStrategy strategy, CSVLoader.FieldAdder base) {
+ this.strategy = strategy;
+ this.base = base;
+ }
+
+ void add(DocumentBuilder builder, int line, int column, String val) {
+ CSVParser parser = new CSVParser(new StringReader(val), strategy);
+ try {
+ String[] vals = parser.getLine();
+ if (vals!=null) {
+ for (String v: vals) base.add(builder,line,column,v);
+ } else {
+ base.add(builder,line,column,val);
+ }
+ } catch (IOException e) {
+ throw new SolrException(400,"");
+ }
+ }
+ }
+
+
+ String errHeader="CSVLoader:";
+
+ CSVLoader(SolrQueryRequest req) {
+ this.params = req.getParams();
+ handler = req.getCore().getUpdateHandler();
+ schema = req.getSchema();
+
+ templateAdd = new AddUpdateCommand();
+ templateAdd.allowDups=false;
+ templateAdd.overwriteCommitted=true;
+ templateAdd.overwritePending=true;
+
+ if (params.getBool(OVERWRITE,true)) {
+ templateAdd.allowDups=false;
+ templateAdd.overwriteCommitted=true;
+ templateAdd.overwritePending=true;
+ } else {
+ templateAdd.allowDups=true;
+ templateAdd.overwriteCommitted=false;
+ templateAdd.overwritePending=false;
+ }
+
+ strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, true, false, true);
+ String sep = params.get(SEPARATOR);
+ if (sep!=null) {
+ if (sep.length()!=1) throw new SolrException(400,"Invalid separator:'"+sep+"'");
+ strategy.setDelimiter(sep.charAt(0));
+ }
+
+ String encapsulator = params.get(ENCAPSULATOR);
+ if (encapsulator!=null) {
+ if (encapsulator.length()!=1) throw new SolrException(400,"Invalid encapsulator:'"+sep+"'");
+ strategy.setEncapsulator(encapsulator.charAt(0));
+ }
+
+ String fn = params.get(FIELDNAMES);
+ fieldnames = fn != null ? commaSplit.split(fn,-1) : null;
+
+ Boolean hasHeader = params.getBool(HEADER);
+
+ if (fieldnames==null) {
+ if (null == hasHeader) {
+ // assume the file has the headers if they aren't supplied in the args
+ hasHeader=true;
+ } else if (hasHeader) {
+ throw new SolrException(400,"CSVLoader: must specify fieldnames=<fields>* or header=true");
+ }
+ } else {
+ // if the fieldnames were supplied and the file has a header, we need to
+ // skip over that header.
+ if (hasHeader!=null && hasHeader) skipLines=1;
+
+ prepareFields();
+ }
+ }
+
+ /** create the FieldAdders that control how each field is indexed */
+ void prepareFields() {
+ // Possible future optimization: for really rapid incremental indexing
+ // from a POST, one could cache all of this setup info based on the params.
+ // The link from FieldAdder to this would need to be severed for that to happen.
+
+ fields = new SchemaField[fieldnames.length];
+ adders = new CSVLoader.FieldAdder[fieldnames.length];
+ String skipStr = params.get(SKIP);
+ List<String> skipFields = skipStr==null ? null : StrUtils.splitSmart(skipStr,',');
+
+ CSVLoader.FieldAdder adder = new CSVLoader.FieldAdder();
+ CSVLoader.FieldAdder adderKeepEmpty = new CSVLoader.FieldAdderEmpty();
+
+ for (int i=0; i<fields.length; i++) {
+ String fname = fieldnames[i];
+ // to skip a field, leave the entries in fields and addrs null
+ if (fname.length()==0 || (skipFields!=null && skipFields.contains(fname))) continue;
+
+ fields[i] = schema.getField(fname);
+ boolean keepEmpty = params.getFieldBool(fname,EMPTY,false);
+ adders[i] = keepEmpty ? adderKeepEmpty : adder;
+
+ // Order that operations are applied: split -> trim -> map -> add
+ // so create in reverse order.
+ // Creation of FieldAdders could be optimized and shared among fields
+
+ String[] fmap = params.getFieldParams(fname,MAP);
+ if (fmap!=null) {
+ for (String mapRule : fmap) {
+ String[] mapArgs = colonSplit.split(mapRule,-1);
+ if (mapArgs.length!=2)
+ throw new SolrException(400, "Map rules must be of the form 'from:to' ,got '"+mapRule+"'");
+ adders[i] = new CSVLoader.FieldMapperSingle(mapArgs[0], mapArgs[1], adders[i]);
+ }
+ }
+
+ if (params.getFieldBool(fname,TRIM,false)) {
+ adders[i] = new CSVLoader.FieldTrimmer(adders[i]);
+ }
+
+ if (params.getFieldBool(fname,SPLIT,false)) {
+ String sepStr = params.getFieldParam(fname,SEPARATOR);
+ char fsep = sepStr==null || sepStr.length()==0 ? ',' : sepStr.charAt(0);
+ String encStr = params.getFieldParam(fname,ENCAPSULATOR);
+ char fenc = encStr==null || encStr.length()==0 ? '\'' : encStr.charAt(0);
+
+ CSVStrategy fstrat = new CSVStrategy(fsep,fenc,CSVStrategy.COMMENTS_DISABLED);
+ adders[i] = new CSVLoader.FieldSplitter(fstrat, adders[i]);
+ }
+ }
+ }
+
+ private void input_err(String msg, String[] line, int lineno) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(errHeader+", line="+lineno + ","+msg+"\n\tvalues={");
+ for (String val: line) { sb.append("'"+val+"',"); }
+ sb.append('}');
+ throw new SolrException(400,sb.toString());
+ }
+
+ /** load the CSV input */
+ void load(Reader input) throws IOException {
+ Reader reader = input;
+ if (skipLines>0) {
+ if (!(reader instanceof BufferedReader)) {
+ reader = new BufferedReader(reader);
+ }
+ BufferedReader r = (BufferedReader)reader;
+ for (int i=0; i<skipLines; i++) {
+ r.readLine();
+ }
+ }
+
+ CSVParser parser = new CSVParser(reader, strategy);
+
+ // parse the fieldnames from the header of the file
+ if (fieldnames==null) {
+ fieldnames = parser.getLine();
+ if (fieldnames==null) {
+ throw new SolrException(400,"Expected fieldnames in CSV input");
+ }
+ prepareFields();
+ }
+
+ // read the rest of the CSV file
+ for(;;) {
+ int line = parser.getLineNumber(); // for error reporting in MT mode
+ String[] vals = parser.getLine();
+ if (vals==null) break;
+
+ if (vals.length != fields.length) {
+ input_err("expected "+fields.length+" values but got "+vals.length, vals, line);
+ }
+
+ addDoc(line,vals);
+ }
+
+ if (params.getBool(COMMIT,true)) {
+ handler.commit(new CommitUpdateCommand(false));
+ }
+ }
+
+ /** called for each line of values (document) */
+ abstract void addDoc(int line, String[] vals) throws IOException;
+
+ /** this must be MT safe... may be called concurrently from multiple threads. */
+ void doAdd(int line, String[] vals, DocumentBuilder builder, AddUpdateCommand template) throws IOException {
+ // the line number is passed simply for error reporting in MT mode.
+ // first, create the lucene document
+ builder.startDoc();
+ for (int i=0; i<vals.length; i++) {
+ if (fields[i]==null) continue; // ignore this field
+ String val = vals[i];
+ adders[i].add(builder, line, i, val);
+ }
+ builder.endDoc();
+
+ template.doc = builder.getDoc();
+ handler.addDoc(template);
+ }
+
+}
+
+
+class SingleThreadedCSVLoader extends CSVLoader {
+ protected DocumentBuilder builder;
+
+ SingleThreadedCSVLoader(SolrQueryRequest req) {
+ super(req);
+ builder = new DocumentBuilder(schema);
+ }
+
+ void addDoc(int line, String[] vals) throws IOException {
+ templateAdd.indexedId = null;
+ doAdd(line, vals, builder, templateAdd);
+ }
+}
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java
------------------------------------------------------------------------------
svn:executable = *
Modified: lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java?view=diff&rev=524175&r1=524174&r2=524175
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java Fri Mar 30 09:59:58 2007
@@ -165,6 +165,14 @@
return val!=null ? val : get(param);
}
+ /** returns the String values of the field parameter, "f.field.param", or
+ * the values for "param" if that is not set.
+ */
+ public String[] getFieldParams(String field, String param) {
+ String[] val = getParams(fpname(field,param));
+ return val!=null ? val : getParams(param);
+ }
+
/** Returns the Boolean value of the param, or null if not set */
public Boolean getBool(String param) {
String val = get(param);
Modified: lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java?view=diff&rev=524175&r1=524174&r2=524175
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java Fri Mar 30 09:59:58 2007
@@ -197,19 +197,32 @@
* @see LocalSolrQueryRequest
*/
public String query(SolrQueryRequest req) throws IOException, Exception {
+ return query(req.getQueryType(), req);
+ }
+ /**
+ * Processes a "query" using a user constructed SolrQueryRequest
+ *
+ * @param handler the name of the request handler to process the request
+ * @param req the Query to process, will be closed.
+ * @return The XML response to the query
+ * @exception Exception any exception in the response.
+ * @exception IOException if there is a problem writing the XML
+ * @see LocalSolrQueryRequest
+ */
+ public String query(String handler, SolrQueryRequest req) throws IOException, Exception {
SolrQueryResponse rsp = new SolrQueryResponse();
- core.execute(req,rsp);
+ core.execute(core.getRequestHandler(handler),req,rsp);
if (rsp.getException() != null) {
throw rsp.getException();
}
-
+
StringWriter sw = new StringWriter(32000);
QueryResponseWriter responseWriter = core.getQueryResponseWriter(req);
responseWriter.write(sw,req,rsp);
req.close();
-
+
return sw.toString();
}
Added: lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java?view=auto&rev=524175
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java Fri Mar 30 09:59:58 2007
@@ -0,0 +1,239 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.handler;
+
+import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.solr.util.ContentStream;
+import org.apache.solr.util.ContentStreamBase;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.core.SolrException;
+
+import java.io.*;
+import java.util.List;
+import java.util.ArrayList;
+
+public class TestCSVLoader extends AbstractSolrTestCase {
+
+ public String getSchemaFile() { return "schema.xml"; }
+ public String getSolrConfigFile() { return "solrconfig.xml"; }
+
+ String filename = "solr_tmp.csv";
+ String def_charset = "UTF-8";
+ File file = new File(filename);
+
+ public void setUp() throws Exception {
+ // if you override setUp or tearDown, you better call
+ // the super classes version
+ super.setUp();
+ }
+ public void tearDown() throws Exception {
+ // if you override setUp or tearDown, you better call
+ // the super classes version
+ super.tearDown();
+ deleteFile();
+ }
+
+ void makeFile(String contents) {
+ makeFile(contents,def_charset);
+ }
+
+ void makeFile(String contents, String charset) {
+ try {
+ Writer out = new OutputStreamWriter(new FileOutputStream(filename), charset);
+ out.write(contents);
+ out.close();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ void deleteFile() {
+ file.delete();
+ }
+
+ void cleanup() {
+ assertU(delQ("id:[100 TO 110]"));
+ assertU(commit());
+ }
+
+ void loadLocal(String... args) throws Exception {
+ LocalSolrQueryRequest req = (LocalSolrQueryRequest)req(args);
+
+ // TODO: stop using locally defined streams once stream.file and
+ // stream.body work everywhere
+ List<ContentStream> cs = new ArrayList<ContentStream>();
+ cs.add(new ContentStreamBase.FileStream(new File(filename)));
+ req.setContentStreams(cs);
+ h.query("/update/csv",req);
+ }
+
+ public void testCSVLoad() throws Exception {
+ makeFile("id\n100\n101\n102");
+ loadLocal("stream.file",filename);
+ // csv loader currently defaults to committing
+ // assertU(commit());
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
+ }
+
+ public void testCommitFalse() throws Exception {
+ makeFile("id\n100\n101\n102");
+ loadLocal("stream.file",filename,"commit","false");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='0']");
+ assertU(commit());
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
+ }
+
+ public void testCommitTrue() throws Exception {
+ makeFile("id\n100\n101\n102");
+ loadLocal("stream.file",filename,"commit","true");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
+ }
+
+ public void testCSV() throws Exception {
+ lrf.args.put("version","2.0");
+
+ makeFile("id,str_s\n100,\"quoted\"\n101,\n102,\"\"\n103,");
+ loadLocal("stream.file",filename,"commit","true");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+ assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
+ assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
+ // 102 is a quoted zero length field ,"", as opposed to ,,
+ // but we can't distinguish this case (and it's debateable
+ // if we should). Does CSV have a way to specify missing
+ // from zero-length?
+ assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
+ assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
+
+ // test overwrite by default
+ loadLocal("stream.file",filename, "commit","true");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+
+ // test no overwrites
+ loadLocal("stream.file",filename, "commit","true", "overwrite","false");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='8']");
+
+ // test overwrite
+ loadLocal("stream.file",filename, "commit","true");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+
+ // test global value mapping
+ loadLocal("stream.file",filename, "commit","true", "map","quoted:QUOTED");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+ assertQ(req("id:100"),"//str[@name='str_s'][.='QUOTED']");
+ assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
+ assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
+ assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
+
+ // test value mapping to empty (remove)
+ loadLocal("stream.file",filename, "commit","true", "map","quoted:");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+ assertQ(req("id:100"),"count(//str[@name='str_s'])=0");
+
+ // test value mapping from empty
+ loadLocal("stream.file",filename, "commit","true", "map",":EMPTY");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+ assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
+ assertQ(req("id:101"),"//str[@name='str_s'][.='EMPTY']");
+ assertQ(req("id:102"),"//str[@name='str_s'][.='EMPTY']");
+ assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
+
+ // test multiple map rules
+ loadLocal("stream.file",filename, "commit","true", "map",":EMPTY", "map","quoted:QUOTED");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+ assertQ(req("id:100"),"//str[@name='str_s'][.='QUOTED']");
+ assertQ(req("id:101"),"//str[@name='str_s'][.='EMPTY']");
+ assertQ(req("id:102"),"//str[@name='str_s'][.='EMPTY']");
+ assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
+
+ // test indexing empty fields
+ loadLocal("stream.file",filename, "commit","true", "f.str_s.keepEmpty","true");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+ assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
+ assertQ(req("id:101"),"//str[@name='str_s'][.='']");
+ assertQ(req("id:102"),"//str[@name='str_s'][.='']");
+ assertQ(req("id:103"),"//str[@name='str_s'][.='']");
+
+ // test overriding the name of fields
+ loadLocal("stream.file",filename, "commit","true",
+ "fieldnames","id,my_s", "header","true",
+ "f.my_s.map",":EMPTY");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+ assertQ(req("id:100"),"//str[@name='my_s'][.='quoted']");
+ assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
+ assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
+ assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
+ assertQ(req("id:101"),"//str[@name='my_s'][.='EMPTY']");
+ assertQ(req("id:102"),"//str[@name='my_s'][.='EMPTY']");
+ assertQ(req("id:103"),"//str[@name='my_s'][.='EMPTY']");
+
+ // test that header in file was skipped
+ assertQ(req("id:id"),"//*[@numFound='0']");
+
+ // test loading file as if it didn't have a header
+ loadLocal("stream.file",filename, "commit","true",
+ "fieldnames","id,my_s", "header","false");
+ assertQ(req("id:id"),"//*[@numFound='1']");
+ assertQ(req("id:100"),"//str[@name='my_s'][.='quoted']");
+
+
+ // test multi-valued fields via field splitting w/ mapping of subvalues
+ makeFile("id,str_s\n"
+ +"100,\"quoted\"\n"
+ +"101,\"a,b,c\"\n"
+ +"102,\"a,,b\"\n"
+ +"103,\n");
+ loadLocal("stream.file",filename, "commit","true",
+ "f.str_s.map",":EMPTY",
+ "f.str_s.split","true");
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+ assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
+ assertQ(req("id:101"),"//arr[@name='str_s']/str[1][.='a']");
+ assertQ(req("id:101"),"//arr[@name='str_s']/str[2][.='b']");
+ assertQ(req("id:101"),"//arr[@name='str_s']/str[3][.='c']");
+ assertQ(req("id:102"),"//arr[@name='str_s']/str[2][.='EMPTY']");
+ assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
+
+
+ // test alternate values for delimiters
+ makeFile("id|str_s\n"
+ +"100|^quoted^\n"
+ +"101|a;'b';c\n"
+ +"102|a;;b\n"
+ +"103|\n");
+
+ loadLocal("stream.file",filename, "commit","true",
+ "separator","|",
+ "encapsulator","^",
+ "f.str_s.map",":EMPTY",
+ "f.str_s.split","true",
+ "f.str_s.separator",";",
+ "f.str_s.encapsulator","'"
+ );
+ assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+ assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
+ assertQ(req("id:101"),"//arr[@name='str_s']/str[1][.='a']");
+ assertQ(req("id:101"),"//arr[@name='str_s']/str[2][.='b']");
+ assertQ(req("id:101"),"//arr[@name='str_s']/str[3][.='c']");
+ assertQ(req("id:102"),"//arr[@name='str_s']/str[2][.='EMPTY']");
+ assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
+ }
+
+
+
+}
Propchange: lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java
------------------------------------------------------------------------------
svn:executable = *
Modified: lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml?view=diff&rev=524175&r1=524174&r2=524175
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml Fri Mar 30 09:59:58 2007
@@ -263,6 +263,9 @@
</lst>
</requestHandler>
+ <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
+ </requestHandler>
+
<!-- enable streaming for testing... -->
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
@@ -274,6 +277,5 @@
<!-- test getting system property -->
<propTest attr1="${solr.test.sys.prop1}-$${literal}"
attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
-
</config>