You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by sh...@apache.org on 2009/01/28 09:30:03 UTC
svn commit: r738401 - in /lucene/solr/trunk/contrib/dataimporthandler:
CHANGES.txt
src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java
src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
Author: shalin
Date: Wed Jan 28 08:30:02 2009
New Revision: 738401
URL: http://svn.apache.org/viewvc?rev=738401&view=rev
Log:
SOLR-980 -- A PlainTextEntityProcessor which can read from any DataSource<Reader> and output a String
Added:
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java (with props)
lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java (with props)
Modified:
lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
Modified: lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt?rev=738401&r1=738400&r2=738401&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt Wed Jan 28 08:30:02 2009
@@ -56,6 +56,9 @@
12.SOLR-988: Add a new scope for session data stored in Context to store objects across imports.
(Noble Paul via shalin)
+13.SOLR-980: A PlainTextEntityProcessor which can read from any DataSource<Reader> and output a String.
+ (Nathan Adams, Noble Paul via shalin)
+
Optimizations
----------------------
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used
Added: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java?rev=738401&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java (added)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java Wed Jan 28 08:30:02 2009
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.dataimport;
+
+import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
+import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
+import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringWriter;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * <p>An implementation of EntityProcessor which reads data from a url/file and give out a row which contains one String
+ * value. The name of the field is 'plainText'.
+ *
+ * @version $Id$
+ * @since solr 1.4
+ */
+public class PlainTextEntityProcessor extends EntityProcessorBase {
+ private static final Logger LOG = LoggerFactory.getLogger(PlainTextEntityProcessor.class);
+ private boolean ended = false;
+
+ public void init(Context context) {
+ super.init(context);
+ ended = false;
+ }
+
+ public Map<String, Object> nextRow() {
+ if (ended) return null;
+ DataSource<Reader> ds = context.getDataSource();
+ String url = context.getVariableResolver().replaceTokens(context.getEntityAttribute(URL));
+ Reader r = null;
+ try {
+ r = ds.getData(url);
+ } catch (Exception e) {
+ if (ABORT.equals(onError)) {
+ wrapAndThrow(SEVERE, e, "Exception reading url : " + url);
+ }
+ return null;
+ }
+ StringWriter sw = new StringWriter();
+ char[] buf = new char[1024];
+ while (true) {
+ int len = 0;
+ try {
+ len = r.read(buf);
+ } catch (IOException e) {
+ if (ABORT.equals(onError)) {
+ wrapAndThrow(SEVERE, e, "Exception reading url : " + url);
+ } else {
+ LOG.warn("IOException while reading from data source", e);
+ return null;
+ }
+ }
+ if (len <= 0) break;
+ sw.append(new String(buf, 0, len));
+ }
+ Map<String, Object> row = new HashMap<String, Object>();
+ row.put(PLAIN_TEXT, sw.toString());
+ ended = true;
+ return super.applyTransformer(row);
+ }
+
+ public static final String PLAIN_TEXT = "plainText";
+}
Propchange: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java?rev=738401&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java (added)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java Wed Jan 28 08:30:02 2009
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.dataimport;
+
+import junit.framework.Assert;
+import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap;
+import org.junit.Test;
+
+import java.io.StringReader;
+import java.util.Properties;
+
+/**
+ * Test for PlainTextEntityProcessor
+ *
+ * @version $Id$
+ * @see org.apache.solr.handler.dataimport.PlainTextEntityProcessor
+ * @since solr 1.4
+ */
+public class TestPlainTextEntityProcessor {
+ @Test
+ public void simple() {
+ DataImporter di = new DataImporter();
+ di.loadAndInit(DATA_CONFIG);
+ TestDocBuilder.SolrWriterImpl sw = new TestDocBuilder.SolrWriterImpl();
+ DataImporter.RequestParams rp = new DataImporter.RequestParams(createMap("command", "full-import"));
+ di.runCmd(rp, sw);
+ Assert.assertEquals(DS.s, sw.docs.get(0).getFieldValue("x"));
+
+ }
+
+ public static class DS extends DataSource {
+ static String s = "hello world";
+
+ public void init(Context context, Properties initProps) {
+
+ }
+
+ public Object getData(String query) {
+
+ return new StringReader(s);
+ }
+
+ public void close() {
+
+ }
+ }
+
+ static String DATA_CONFIG = "<dataConfig>\n" +
+ "\t<dataSource type=\"TestPlainTextEntityProcessor$DS\" />\n" +
+ "\t<document>\n" +
+ "\t\t<entity processor=\"PlainTextEntityProcessor\" name=\"x\" query=\"x\">\n" +
+ "\t\t\t<field column=\"plainText\" name=\"x\" />\n" +
+ "\t\t</entity>\n" +
+ "\t</document>\n" +
+ "</dataConfig>";
+}
Propchange: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL