You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2013/07/05 17:30:43 UTC

svn commit: r1500046 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/handler/loader/CSVLoaderBase.java core/src/test/org/apache/solr/handler/TestCSVLoader.java

Author: gsingers
Date: Fri Jul  5 15:30:42 2013
New Revision: 1500046

URL: http://svn.apache.org/r1500046
Log:
SOLR-5003: add rowid (line number) option to CSV Loader

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/loader/CSVLoaderBase.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/TestCSVLoader.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1500046&r1=1500045&r2=1500046&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri Jul  5 15:30:42 2013
@@ -156,6 +156,9 @@ New Features
   which are INACTIVE or have no range (created for custom sharding).
   (Anshum Gupta, shalin)
 
+* SOLR-5003: CSV Update Handler supports optionally adding the line number/row id to 
+  a document (gsingers)
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/loader/CSVLoaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/loader/CSVLoaderBase.java?rev=1500046&r1=1500045&r2=1500046&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/loader/CSVLoaderBase.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/loader/CSVLoaderBase.java Fri Jul  5 15:30:42 2013
@@ -55,6 +55,7 @@ abstract class CSVLoaderBase extends Con
   public static final String ESCAPE="escape";
   public static final String OVERWRITE="overwrite";
   public static final String LITERALS_PREFIX = "literal.";
+  public static final String ROW_ID = "rowid";
 
   private static Pattern colonSplit = Pattern.compile(":");
   private static Pattern commaSplit = Pattern.compile(",");
@@ -65,13 +66,15 @@ abstract class CSVLoaderBase extends Con
   final SolrParams params;
   final CSVStrategy strategy;
   final UpdateRequestProcessor processor;
-
   // hashmap to save any literal fields and their values
   HashMap <SchemaField, String> literals;
+
   String[] fieldnames;
   SchemaField[] fields;
   CSVLoaderBase.FieldAdder[] adders;
 
+  String rowId = null;// if not null, add a special field by the name given with the line number/row id as the value
+
   int skipLines;    // number of lines to skip at start of file
 
   final AddUpdateCommand templateAdd;
@@ -186,6 +189,7 @@ abstract class CSVLoaderBase extends Con
     if (escape!=null) {
       if (escape.length()!=1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid escape:'"+escape+"'");
     }
+    rowId = params.get(ROW_ID);
 
     // if only encapsulator or escape is set, disable the other escaping mechanism
     if (encapsulator == null && escape != null) {
@@ -290,6 +294,7 @@ abstract class CSVLoaderBase extends Con
       if (!pname.startsWith(LITERALS_PREFIX)) continue;
 
       String name = pname.substring(LITERALS_PREFIX.length());
+      //TODO: need to look at this in light of schemaless
       SchemaField sf = schema.getFieldOrNull(name);
       if(sf == null)
         throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid field name for literal:'"+ name +"'");
@@ -378,7 +383,7 @@ abstract class CSVLoaderBase extends Con
 
   /** this must be MT safe... may be called concurrently from multiple threads. */
   void doAdd(int line, String[] vals, SolrInputDocument doc, AddUpdateCommand template) throws IOException {
-    // the line number is passed simply for error reporting in MT mode.
+    // the line number is passed for error reporting in MT mode as well as for optional rowId.
     // first, create the lucene document
     for (int i=0; i<vals.length; i++) {
       if (fields[i]==null) continue;  // ignore this field
@@ -392,7 +397,9 @@ abstract class CSVLoaderBase extends Con
       String val = literals.get(sf);
       doc.addField(fn, val);
     }
-   
+    if (rowId != null){
+      doc.addField(rowId, line);
+    }
     template.solrDoc = doc;
     processor.processAdd(template);
   }

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/TestCSVLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/TestCSVLoader.java?rev=1500046&r1=1500045&r2=1500046&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/TestCSVLoader.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/TestCSVLoader.java Fri Jul  5 15:30:42 2013
@@ -108,6 +108,17 @@ public class TestCSVLoader extends SolrT
   }
 
   @Test
+  public void testCSVRowId() throws Exception {
+    makeFile("id\n100\n101\n102");
+    loadLocal("rowid", "rowid_i");//add a special field
+    // check default commit of false
+    assertU(commit());
+    assertQ(req("rowid_i:1"),"//*[@numFound='1']");
+    assertQ(req("rowid_i:2"),"//*[@numFound='1']");
+    assertQ(req("rowid_i:100"),"//*[@numFound='0']");
+  }
+
+  @Test
   public void testCommitFalse() throws Exception {
     makeFile("id\n100\n101\n102");
     loadLocal("commit","false");