You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by sh...@apache.org on 2009/01/28 09:30:03 UTC

svn commit: r738401 - in /lucene/solr/trunk/contrib/dataimporthandler: CHANGES.txt src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java

Author: shalin
Date: Wed Jan 28 08:30:02 2009
New Revision: 738401

URL: http://svn.apache.org/viewvc?rev=738401&view=rev
Log:
SOLR-980 -- A PlainTextEntityProcessor which can read from any DataSource<Reader> and output a String

Added:
    lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java   (with props)
    lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java   (with props)
Modified:
    lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt

Modified: lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt?rev=738401&r1=738400&r2=738401&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt Wed Jan 28 08:30:02 2009
@@ -56,6 +56,9 @@
 12.SOLR-988:  Add a new scope for session data stored in Context to store objects across imports.
               (Noble Paul via shalin)
 
+13.SOLR-980:  A PlainTextEntityProcessor which can read from any DataSource<Reader> and output a String.
+              (Nathan Adams, Noble Paul via shalin)
+
 Optimizations
 ----------------------
 1. SOLR-846:  Reduce memory consumption during delta import by removing keys when used

Added: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java?rev=738401&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java (added)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java Wed Jan 28 08:30:02 2009
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.dataimport;
+
+import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
+import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
+import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringWriter;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * <p>An implementation of EntityProcessor which reads data from a url/file and give out a row which contains one String
+ * value. The name of the field is 'plainText'.
+ *
+ * @version $Id$
+ * @since solr 1.4
+ */
+public class PlainTextEntityProcessor extends EntityProcessorBase {
+  private static final Logger LOG = LoggerFactory.getLogger(PlainTextEntityProcessor.class);
+  private boolean ended = false;
+
+  public void init(Context context) {
+    super.init(context);
+    ended = false;
+  }
+
+  public Map<String, Object> nextRow() {
+    if (ended) return null;
+    DataSource<Reader> ds = context.getDataSource();
+    String url = context.getVariableResolver().replaceTokens(context.getEntityAttribute(URL));
+    Reader r = null;
+    try {
+      r = ds.getData(url);
+    } catch (Exception e) {
+      if (ABORT.equals(onError)) {
+        wrapAndThrow(SEVERE, e, "Exception reading url : " + url);
+      }
+      return null;
+    }
+    StringWriter sw = new StringWriter();
+    char[] buf = new char[1024];
+    while (true) {
+      int len = 0;
+      try {
+        len = r.read(buf);
+      } catch (IOException e) {
+        if (ABORT.equals(onError)) {
+          wrapAndThrow(SEVERE, e, "Exception reading url : " + url);
+        } else {
+          LOG.warn("IOException while reading from data source", e);
+          return null;
+        }
+      }
+      if (len <= 0) break;
+      sw.append(new String(buf, 0, len));
+    }
+    Map<String, Object> row = new HashMap<String, Object>();
+    row.put(PLAIN_TEXT, sw.toString());
+    ended = true;
+    return super.applyTransformer(row);
+  }
+
+  public static final String PLAIN_TEXT = "plainText";
+}

Propchange: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java?rev=738401&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java (added)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java Wed Jan 28 08:30:02 2009
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.dataimport;
+
+import junit.framework.Assert;
+import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap;
+import org.junit.Test;
+
+import java.io.StringReader;
+import java.util.Properties;
+
+/**
+ * Test for PlainTextEntityProcessor
+ *
+ * @version $Id$
+ * @see org.apache.solr.handler.dataimport.PlainTextEntityProcessor
+ * @since solr 1.4
+ */
+public class TestPlainTextEntityProcessor {
+  @Test
+  public void simple() {
+    DataImporter di = new DataImporter();
+    di.loadAndInit(DATA_CONFIG);
+    TestDocBuilder.SolrWriterImpl sw = new TestDocBuilder.SolrWriterImpl();
+    DataImporter.RequestParams rp = new DataImporter.RequestParams(createMap("command", "full-import"));
+    di.runCmd(rp, sw);
+    Assert.assertEquals(DS.s, sw.docs.get(0).getFieldValue("x"));
+
+  }
+
+  public static class DS extends DataSource {
+    static String s = "hello world";
+
+    public void init(Context context, Properties initProps) {
+
+    }
+
+    public Object getData(String query) {
+
+      return new StringReader(s);
+    }
+
+    public void close() {
+
+    }
+  }
+
+  static String DATA_CONFIG = "<dataConfig>\n" +
+          "\t<dataSource type=\"TestPlainTextEntityProcessor$DS\" />\n" +
+          "\t<document>\n" +
+          "\t\t<entity processor=\"PlainTextEntityProcessor\" name=\"x\" query=\"x\">\n" +
+          "\t\t\t<field column=\"plainText\" name=\"x\" />\n" +
+          "\t\t</entity>\n" +
+          "\t</document>\n" +
+          "</dataConfig>";
+}

Propchange: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL