You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2011/05/13 15:34:19 UTC
svn commit: r1102718 -
/lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
Author: gsingers
Date: Fri May 13 13:34:18 2011
New Revision: 1102718
URL: http://svn.apache.org/viewvc?rev=1102718&view=rev
Log:
SOLR-2511: slight refactoring to make it easier to override
Modified:
lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
Modified: lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java?rev=1102718&r1=1102717&r2=1102718&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java (original)
+++ lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/SolrContentHandler.java Fri May 13 13:34:18 2011
@@ -47,23 +47,23 @@ import java.util.*;
*/
public class SolrContentHandler extends DefaultHandler implements ExtractingParams {
private transient static Logger log = LoggerFactory.getLogger(SolrContentHandler.class);
- private SolrInputDocument document;
+ protected SolrInputDocument document;
- private Collection<String> dateFormats = DateUtil.DEFAULT_DATE_FORMATS;
+ protected Collection<String> dateFormats = DateUtil.DEFAULT_DATE_FORMATS;
- private Metadata metadata;
- private SolrParams params;
- private StringBuilder catchAllBuilder = new StringBuilder(2048);
- private IndexSchema schema;
- private Map<String, StringBuilder> fieldBuilders = Collections.emptyMap();
+ protected Metadata metadata;
+ protected SolrParams params;
+ protected StringBuilder catchAllBuilder = new StringBuilder(2048);
+ protected IndexSchema schema;
+ protected Map<String, StringBuilder> fieldBuilders = Collections.emptyMap();
private LinkedList<StringBuilder> bldrStack = new LinkedList<StringBuilder>();
- private boolean captureAttribs;
- private boolean lowerNames;
- private String contentFieldName = "content";
+ protected boolean captureAttribs;
+ protected boolean lowerNames;
+ protected String contentFieldName = "content";
- private String unknownFieldPrefix = "";
- private String defaultField = "";
+ protected String unknownFieldPrefix = "";
+ protected String defaultField = "";
public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS);
@@ -99,46 +99,82 @@ public class SolrContentHandler extends
* The base implementation adds the metadata as fields, allowing for potential remapping.
*
* @return The {@link org.apache.solr.common.SolrInputDocument}.
+ *
+ * @see #addMetadata()
+ * @see #addCapturedContent()
+ * @see #addContent()
+ * @see #addLiterals()
*/
public SolrInputDocument newDocument() {
float boost = 1.0f;
//handle the metadata extracted from the document
- for (String name : metadata.names()) {
- String[] vals = metadata.getValues(name);
- addField(name, null, vals);
- }
+ addMetadata();
//handle the literals from the params
- Iterator<String> paramNames = params.getParameterNamesIterator();
- while (paramNames.hasNext()) {
- String pname = paramNames.next();
- if (!pname.startsWith(LITERALS_PREFIX)) continue;
-
- String name = pname.substring(LITERALS_PREFIX.length());
- addField(name, null, params.getParams(pname));
- }
+ addLiterals();
//add in the content
- addField(contentFieldName, catchAllBuilder.toString(), null);
+ addContent();
//add in the captured content
+ addCapturedContent();
+
+ if (log.isDebugEnabled()) {
+ log.debug("Doc: {}", document);
+ }
+ return document;
+ }
+
+ /**
+ * Add the per field captured content to the Solr Document. Default implementation uses the
+ * {@link #fieldBuilders} info
+ */
+ protected void addCapturedContent() {
for (Map.Entry<String, StringBuilder> entry : fieldBuilders.entrySet()) {
if (entry.getValue().length() > 0) {
addField(entry.getKey(), entry.getValue().toString(), null);
}
}
- if (log.isDebugEnabled()) {
- log.debug("Doc: " + document);
+ }
+
+ /**
+ * Add in the catch all content to the field. Default impl. uses the {@link #contentFieldName}
+ * and the {@link #catchAllBuilder}
+ */
+ protected void addContent() {
+ addField(contentFieldName, catchAllBuilder.toString(), null);
+ }
+
+ /**
+ * Add in the literals to the document using the {@link #params} and the {@link #LITERALS_PREFIX}.
+ */
+ protected void addLiterals() {
+ Iterator<String> paramNames = params.getParameterNamesIterator();
+ while (paramNames.hasNext()) {
+ String pname = paramNames.next();
+ if (!pname.startsWith(LITERALS_PREFIX)) continue;
+
+ String name = pname.substring(LITERALS_PREFIX.length());
+ addField(name, null, params.getParams(pname));
+ }
+ }
+
+ /**
+ * Add in any metadata using {@link #metadata} as the source.
+ */
+ protected void addMetadata() {
+ for (String name : metadata.names()) {
+ String[] vals = metadata.getValues(name);
+ addField(name, null, vals);
}
- return document;
}
// Naming rules:
// 1) optionally map names to nicenames (lowercase+underscores)
// 2) execute "map" commands
// 3) if resulting field is unknown, map it to a common prefix
- private void addField(String fname, String fval, String[] vals) {
+ protected void addField(String fname, String fval, String[] vals) {
if (lowerNames) {
StringBuilder sb = new StringBuilder();
for (int i=0; i<fname.length(); i++) {