You are viewing a plain text version of this content. The canonical link for it is here.
Posted to slide-dev@jakarta.apache.org by lu...@apache.org on 2004/10/22 17:14:46 UTC
cvs commit: jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions PropertyContainsExpression.java BetweenExpression.java EqExpression.java GtExpression.java IsDefinedExpression.java LikeExpression.java LtExpression.java PropcontainsExpression.java
luetzkendorf 2004/10/22 08:14:46
Modified: src/stores/org/apache/slide/index/lucene Index.java
IndexConfiguration.java
LuceneExpressionFactory.java
LucenePropertiesIndexer.java
src/stores/org/apache/slide/index/lucene/expressions
BetweenExpression.java EqExpression.java
GtExpression.java IsDefinedExpression.java
LikeExpression.java LtExpression.java
PropcontainsExpression.java
Added: src/stores/org/apache/slide/index/lucene properties.xml
src/stores/org/apache/slide/index/lucene/expressions
PropertyContainsExpression.java
Removed: src/stores/org/apache/slide/index/lucene IndexException.java
Log:
continued (now configurable, text properties supported, ...)
Revision Changes Path
1.2 +189 -28 jakarta-slide/src/stores/org/apache/slide/index/lucene/Index.java
Index: Index.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/Index.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Index.java 18 Oct 2004 09:27:36 -0000 1.1
+++ Index.java 22 Oct 2004 15:14:45 -0000 1.2
@@ -27,13 +27,14 @@
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
+import java.util.Enumeration;
import java.util.Iterator;
import java.util.Locale;
-import java.util.Map;
import java.util.Set;
+import java.util.StringTokenizer;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
@@ -42,6 +43,11 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
+import org.apache.slide.common.Uri;
+import org.apache.slide.content.NodeProperty;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionNumber;
+import org.apache.slide.search.IndexException;
import org.apache.slide.util.logger.Logger;
/**
@@ -65,20 +71,24 @@
public static final String INT_LOWER_BOUND = INT_INDEX_FORMAT.format(Long.MIN_VALUE);
public static final String INT_UPPER_BOUND = INT_INDEX_FORMAT.format(Long.MAX_VALUE);
+ private static final String LOG_CHANNEL = Index.class.getName();
- private Analyzer analyzer;
-
- private IndexConfiguration configuration;
+ protected IndexConfiguration configuration;
private String directoryName;
private Logger logger;
+ /**
+ * Counter for recently executed index jobs (insertions, deletions).
+ * Will be reseted after optimization.
+ */
+ private int jobCounter = 0;
+
- public Index(String directoryName, Analyzer analyzer, Logger logger) throws IndexException
+ public Index(String directoryName, Logger logger) throws IndexException
{
this.directoryName = directoryName;
- this.analyzer = analyzer;
this.logger = logger;
this.configuration = new IndexConfiguration();
@@ -87,7 +97,7 @@
File file = new File(this.directoryName);
if (!file.exists() && !file.mkdirs()) {
throw new IndexException(
- "Error can't get or create index directory: ",
+ "Error can't find or create index directory: " +
this.directoryName);
}
@@ -99,11 +109,11 @@
}
} else {
IndexWriter writer = new IndexWriter(directory,
- this.analyzer, true);
+ configuration.getAnalyzer(), true);
writer.close();
}
} catch (IOException e) {
- throw new IndexException("Error while creating index: ",
+ throw new IndexException("Error while creating index: " +
this.directoryName, e);
}
@@ -132,27 +142,178 @@
return FSDirectory.getDirectory(this.directoryName, false);
}
- synchronized void addIndexJob(Map toAdd, Set toRemove) throws IOException {
- // TODO make async by option
- if (toRemove.size() > 0) {
- IndexReader reader = IndexReader.open(getDirectory());
- for(Iterator i = toRemove.iterator(); i.hasNext();) {
- String key = (String)i.next();
- reader.delete(new Term(Index.KEY_FIELD_NAME, key));
- }
- reader.close();
+ private Field unstoredString(String fieldName, String value) {
+ return new Field(fieldName, value, false, true, false);
+ }
+ private Field storedString(String fieldName, String value) {
+ return new Field(fieldName, value, true, true, false);
+ }
+ private Field textField(String fieldName, String value) {
+ return new Field(fieldName, value, false, true, true);
+ }
+ private Document createLuceneDocument(String uri, NodeRevisionDescriptor descriptor) {
+
+ Document doc = new Document();
+
+ doc.add(unstoredString(Index.KEY_FIELD_NAME,
+ configuration.generateKey(uri, descriptor.getRevisionNumber())));
+ doc.add(storedString(Index.URI_FIELD_NAME, uri.toString()));
+
+ // scopes
+ StringTokenizer tokenizer = new StringTokenizer(uri, "/");
+ StringBuffer buffer = new StringBuffer(uri.length());
+ doc.add(unstoredString(Index.SCOPE_FIELD_NAME, "/"));
+ for(; tokenizer.hasMoreTokens();) {
+ buffer.append("/").append(tokenizer.nextToken());
+ doc.add(unstoredString(Index.SCOPE_FIELD_NAME, buffer.toString()));
}
-
- if (toAdd.size() > 0) {
- IndexWriter writer = new IndexWriter(getDirectory(),
- this.analyzer, false);
-
- for(Iterator i = toAdd.values().iterator(); i.hasNext(); ) {
- Document doc = (Document)i.next();
- writer.addDocument(doc);
+
+ //doc.add(unstoredString(Index.VERSION_FIELD_NAME,
+ // descriptor.getRevisionNumber().toString()));
+
+ // resource type
+ String rtype = descriptor.getResourceType();
+ if (rtype.indexOf("collection") != -1) {
+ doc.add(unstoredString(configuration.generateFieldName(
+ NodeProperty.DEFAULT_NAMESPACE, "resourcetype"),
+ "collection"));
+ if (rtype.indexOf("principal") != -1) {
+ doc.add(unstoredString(configuration.generateFieldName(
+ NodeProperty.DEFAULT_NAMESPACE, "resourcetype"),
+ "principal"));
}
+ }
+
+ for(Enumeration e = descriptor.enumerateProperties(); e.hasMoreElements();) {
+ NodeProperty property = (NodeProperty)e.nextElement();
+ String name = property.getName();
+ String namespace = property.getNamespace();
+ Object value = property.getValue();
- writer.close();
+ if (value == null) continue;
+ if (!configuration.isIndexedProperty(namespace, name)) continue;
+
+ if (configuration.isStringProperty(namespace, name)) {
+ doc.add(unstoredString(configuration.generateFieldName(
+ property.getNamespace(), property.getName()),
+ value.toString()));
+ }
+ if (configuration.isDateProperty(namespace, name)) {
+ Date date = configuration.getDateValue(value);
+ if (date != null) {
+ doc.add(unstoredString(configuration.generateFieldName(
+ property.getNamespace(), property.getName()),
+ configuration.dateToIndexString(date)));
+ }
+ }
+ if (configuration.isIntProperty(namespace, name)) {
+ try {
+ doc.add(unstoredString(configuration.generateFieldName(
+ property.getNamespace(), property.getName()),
+ configuration.intToIndexString(
+ Long.parseLong(value.toString()))));
+ } catch (NumberFormatException ex) {
+ // TODO log warning
+ }
+ }
+ if (configuration.isTextProperty(namespace, name)) {
+ doc.add(textField(configuration.generateFieldName(
+ property.getNamespace(), property.getName()),
+ value.toString()));
+ }
+ if (configuration.supportsIsDefined(namespace, name)) {
+ doc.add(unstoredString(Index.IS_DEFINED_FIELD_NAME,
+ configuration.generateFieldName(
+ property.getNamespace(), property.getName())));
+ }
+ }
+
+ return doc;
+ }
+
+
+ synchronized void executeIndexTransaction(Set removeJobs, Set addJobs)
+ throws IndexException {
+ // TODO make async by option
+ try {
+ if (removeJobs.size() > 0) {
+ IndexReader reader = IndexReader.open(getDirectory());
+ for(Iterator i = removeJobs.iterator(); i.hasNext();) {
+ IndexJob job = (IndexJob)i.next();
+ logger.log("remove: " + job.key, LOG_CHANNEL, Logger.INFO);
+ reader.delete(new Term(Index.KEY_FIELD_NAME, job.getKey()));
+ this.jobCounter++;
+ }
+ reader.close();
+ }
+
+ if (addJobs.size() > 0) {
+ IndexWriter writer = new IndexWriter(getDirectory(),
+ configuration.getAnalyzer(), false);
+
+ for(Iterator i = addJobs.iterator(); i.hasNext(); ) {
+ IndexJob job = (IndexJob)i.next();
+ logger.log("index: " + job.key, LOG_CHANNEL, Logger.INFO);
+ Document doc = createLuceneDocument(job.uri, job.descriptor);
+ writer.addDocument(doc);
+ this.jobCounter++;
+ }
+ if (this.jobCounter > configuration.getOptimizeThreshold()) {
+ writer.optimize();
+ logger.log("optimize", LOG_CHANNEL, Logger.INFO);
+ this.jobCounter = 0;
+ }
+ writer.close();
+ }
+ } catch (Exception e) {
+ throw new IndexException(e);
+ }
+ }
+
+
+ public IndexJob createIndexJob(Uri uri,
+ NodeRevisionNumber number, NodeRevisionDescriptor descriptor) {
+ return new IndexJob(uri, number, descriptor);
+ }
+ public IndexJob createDeleteJob(Uri uri, NodeRevisionNumber number) {
+ return new IndexJob(uri, number);
+ }
+
+ public class IndexJob {
+ protected String key;
+ protected String uri;
+ protected String number;
+ protected NodeRevisionDescriptor descriptor;
+
+ protected String getKey() {
+ return key;
+ }
+ protected IndexJob(Uri uri, NodeRevisionNumber number) {
+ this.uri = uri.toString();
+ this.number = number.toString();
+ this.descriptor = null;
+ this.key = configuration.generateKey(this.uri, number);
+ }
+ protected IndexJob(Uri uri, NodeRevisionNumber number, NodeRevisionDescriptor descriptor) {
+ this.uri = uri.toString();
+ this.number = number.toString();
+ this.descriptor = descriptor;
+ this.key = configuration.generateKey(this.uri, number);
+ }
+ public boolean equals(Object obj)
+ {
+ if (this == obj) return true;
+ if (obj instanceof IndexJob) {
+ return this.key.equals(((IndexJob)obj).key);
+ }
+ return false;
+ }
+ public int hashCode()
+ {
+ return key.hashCode();
+ }
+ public String toString() {
+ return this.key;
}
}
}
1.2 +132 -71 jakarta-slide/src/stores/org/apache/slide/index/lucene/IndexConfiguration.java
Index: IndexConfiguration.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/IndexConfiguration.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- IndexConfiguration.java 18 Oct 2004 09:27:36 -0000 1.1
+++ IndexConfiguration.java 22 Oct 2004 15:14:45 -0000 1.2
@@ -22,53 +22,53 @@
*/
package org.apache.slide.index.lucene;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
-import org.apache.slide.common.Uri;
-import org.apache.slide.content.NodeProperty;
import org.apache.slide.content.NodeRevisionNumber;
+import org.apache.slide.search.IndexException;
+import org.jdom.Document;
import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.input.SAXBuilder;
/**
+ * Holds all configuration infos about indexing.
*/
public class IndexConfiguration
{
- private Set keywordProperties = new HashSet();
- private Set textProperties = new HashSet();
- private Set dateProperties = new HashSet();
- private Set intProperties = new HashSet();
- private Set supportsIsdefinedProperties = new HashSet();
- private Set indexedProperties = new HashSet();
- private Map analyzers = new HashMap();
+ protected Set stringProperties = new HashSet();
+ protected Map textProperties = new HashMap();
+ protected Set dateProperties = new HashSet();
+ protected Set intProperties = new HashSet();
+ protected Set supportsIsdefinedProperties = new HashSet();
+ protected Set indexedProperties = new HashSet();
+ protected int optimizeThreshold = 100;
+ protected Analyzer analyzer = new IndexAnalyzer();
- public void addKeywordProperty(String namespace, String name) {
+ public void addStringProperty(String namespace, String name) {
String key = namespace + name;
- this.keywordProperties.add(key);
+ this.stringProperties.add(key);
this.indexedProperties.add(key);
}
- public boolean isKeywordProperty(NodeProperty property) {
- return this.keywordProperties.contains(
- property.getNamespace() + property.getName());
- }
-
- public void addTextProperty(String namespace, String name) {
- String key = namespace + name;
- this.textProperties.add(key);
- this.indexedProperties.add(key);
- }
- public boolean isTextProperty(NodeProperty property) {
- return this.textProperties.contains(
- property.getNamespace() + property.getName());
+ public boolean isStringProperty(String namespace, String name) {
+ return this.stringProperties.contains(namespace + name);
}
public void addDateProperty(String namespace, String name) {
@@ -79,12 +79,6 @@
public boolean isDateProperty(String namespace, String name) {
return this.dateProperties.contains(namespace + name);
}
- public boolean isDateProperty(NodeProperty property) {
- return isDateProperty(property.getNamespace(), property.getName());
- }
- public boolean isDateProperty(Element element) {
- return isDateProperty(element.getNamespaceURI(), element.getName());
- }
public void addIntProperty(String namespace, String name) {
@@ -95,12 +89,6 @@
public boolean isIntProperty(String namespace, String name) {
return this.intProperties.contains(namespace + name);
}
- public boolean isIntProperty(NodeProperty property) {
- return isIntProperty(property.getNamespace(), property.getName());
- }
- public boolean isIntProperty(Element element) {
- return isIntProperty(element.getNamespaceURI(), element.getName());
- }
public void addSupportsIsdefinedProperty(String namespace, String name) {
String key = namespace + name;
@@ -111,41 +99,104 @@
public boolean supportsIsDefined(String namespace, String name) {
return this.supportsIsdefinedProperties.contains(namespace + name);
}
- public boolean supportsIsDefined(NodeProperty property) {
- return supportsIsDefined(property.getNamespace(), property.getName());
+
+ public void addTextProperty(String namespace, String name, Analyzer analyzer) {
+ String key = namespace + name;
+ this.textProperties.put(key, analyzer);
+ this.indexedProperties.add(key);
}
- public boolean supportsIsDefined(Element element) {
- return supportsIsDefined(element.getNamespaceURI(), element.getName());
+
+ public boolean isTextProperty(String namespace, String name) {
+ return this.textProperties.containsKey(namespace + name);
}
+
public boolean isIndexedProperty(String namespace, String name) {
return this.indexedProperties.contains(namespace + name);
}
- public boolean isIndexedProperty(NodeProperty property) {
- return isIndexedProperty(property.getNamespace(), property.getName());
+
+
+ public int getOptimizeThreshold() {
+ return this.optimizeThreshold;
+ }
+ public void setOptimizeThreshold(int value) {
+ this.optimizeThreshold = value;
}
- public void addAnalyzer(String namespace, String name, Analyzer analyzer) {
- this.analyzers.put(namespace + name, analyzer);
+ public Analyzer getAnalyzer() {
+ return this.analyzer;
}
- void initDefaultConfiguration() {
-
- addKeywordProperty("DAV:", "displayname");
- addKeywordProperty("DAV:", "getcontenttype");
- addSupportsIsdefinedProperty("DAV:", "getcontenttype");
- addKeywordProperty("DAV:", "getcontentlanguage");
-
- addKeywordProperty("DAV:", "owner");
- addSupportsIsdefinedProperty("DAV:", "owner");
- addKeywordProperty("DAV:", "modificationuser");
- addSupportsIsdefinedProperty("DAV:", "modificationuser");
-
- addIntProperty("DAV:", "getcontentlength");
-
- addDateProperty("DAV:", "getlastmodified");
- addDateProperty("DAV:", "creationdate");
- addDateProperty("DAV:", "modificationdate");
+ void initDefaultConfiguration() throws IndexException {
+ loadConfigurationFromResource(
+ "org/apache/slide/index/lucene/properties.xml");
+ }
+
+ void loadConfigurationFromResource(String resourceName) throws IndexException {
+ InputStream is = this.getClass().getClassLoader().getResourceAsStream(
+ resourceName);
+ if (is != null) {
+ importConfiguration(is);
+ } else {
+ throw new IndexException("Can't find index configuration at: " + resourceName);
+ }
+ }
+
+ private void importConfiguration(InputStream is) throws IndexException {
+ try {
+ Document doc = new SAXBuilder().build(is);
+ List properties = doc.getRootElement().getChildren();
+
+ for(Iterator i = properties.iterator(); i.hasNext();) {
+ Element e = (Element)i.next();
+ String n = e.getName(); String ns = e.getNamespaceURI();
+
+ Element child;
+
+ child = e.getChild("string");
+ if (child != null) {
+ addStringProperty(ns, n);
+ }
+ child = e.getChild("integer");
+ if (child != null) {
+ addIntProperty(ns, n);
+ }
+ child = e.getChild("date");
+ if (child != null) {
+ addDateProperty(ns, n);
+ }
+ child = e.getChild("text");
+ if (child != null) {
+ String clsName = child.getAttributeValue("analyzer");
+ if (clsName == null) {
+ clsName = "org.apache.lucene.analysis.SimpleAnalyzer";
+ }
+ Analyzer analyzer;
+ try {
+ Class cls = Class.forName(clsName);
+ analyzer = (Analyzer)cls.newInstance();
+ } catch (ClassNotFoundException ex) {
+ throw new IndexException("Analyzer class not found (" + ns + ", " + n + ")", ex);
+ } catch (InstantiationException ex) {
+ throw new IndexException("Can't instanciate analyzer (" + ns + ", " + n + ")", ex);
+ } catch (IllegalAccessException ex) {
+ throw new IndexException("Can't instanciate analyzer (" + ns + ", " + n + ")", ex);
+ } catch (ClassCastException ex) {
+ throw new IndexException("Analyzer does not extend Analyzer (" + ns + ", " + n + ")", ex);
+ }
+ addTextProperty(ns, n, analyzer);
+ }
+ child = e.getChild("is-defined");
+ if (child != null) {
+ addSupportsIsdefinedProperty(ns, n);
+ }
+ }
+
+ } catch (JDOMException e) {
+ throw new IndexException(e);
+ } catch (IOException e) {
+ throw new IndexException(e);
+ }
}
// ------ data type helper -------------------------------------------------
@@ -156,14 +207,8 @@
public String generateFieldName(String namespaceUri, String name) {
return namespaceUri + name;
}
- public String generateFieldName(NodeProperty property) {
- return generateFieldName(property.getNamespace(), property.getName());
- }
- public String generateFieldName(Element element) {
- return generateFieldName(element.getNamespaceURI(), element.getName());
- }
- public String generateKey(Uri uri, NodeRevisionNumber number) {
+ public String generateKey(String uri, NodeRevisionNumber number) {
return uri.toString() + "#" + number;
}
@@ -211,6 +256,22 @@
}
}
return null;
+ }
+ }
+
+
+ class IndexAnalyzer extends Analyzer {
+ Analyzer defaultAnalyzer = new SimpleAnalyzer();
+
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ Analyzer analyzer = (Analyzer)textProperties.get(fieldName);
+ if (analyzer != null) {
+ return analyzer.tokenStream(fieldName, reader);
+ } else {
+ // TODO should not happen, throw an exception?
+ return this.defaultAnalyzer.tokenStream(fieldName, reader);
+ }
}
}
}
1.2 +10 -7 jakarta-slide/src/stores/org/apache/slide/index/lucene/LuceneExpressionFactory.java
Index: LuceneExpressionFactory.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/LuceneExpressionFactory.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LuceneExpressionFactory.java 18 Oct 2004 09:27:36 -0000 1.1
+++ LuceneExpressionFactory.java 22 Oct 2004 15:14:45 -0000 1.2
@@ -36,6 +36,7 @@
import org.apache.slide.index.lucene.expressions.LtExpression;
import org.apache.slide.index.lucene.expressions.MergeExpression;
import org.apache.slide.index.lucene.expressions.PropcontainsExpression;
+import org.apache.slide.index.lucene.expressions.PropertyContainsExpression;
import org.apache.slide.search.BadQueryException;
import org.apache.slide.search.PropertyProvider;
import org.apache.slide.search.basic.BasicExpressionFactory;
@@ -148,20 +149,20 @@
}
else if (name.equals(Literals.ISDEFINED)) {
Element property = AbstractExpression.getPropertyElement(e);
- if (index.getConfiguration().supportsIsDefined(property)) {
+ if (index.getConfiguration().supportsIsDefined(property.getNamespaceURI(), property.getName())) {
return new IsDefinedExpression(this.index, e, false);
}
}
else if (name.equals(Literals.NOT_ISDEFINED)) {
Element property = AbstractExpression.getPropertyElement(e);
- if (index.getConfiguration().supportsIsDefined(property)) {
+ if (index.getConfiguration().supportsIsDefined(property.getNamespaceURI(), property.getName())) {
return new IsDefinedExpression(this.index, e, true);
}
}
return super.createExpression(e);
}
- private IBasicExpression createSlideExpression (Element e)
+ private IBasicExpression createSlideExpression(Element e)
throws BadQueryException
{
String name = e.getName();
@@ -181,7 +182,9 @@
if (name.equals("between-inclusive")) {
return new BetweenExpression(this.index, e, true);
}
-
+ if (name.equals("property-contains")) {
+ return new PropertyContainsExpression(this.index, e);
+ }
return super.createExpression(e);
}
1.2 +36 -142 jakarta-slide/src/stores/org/apache/slide/index/lucene/LucenePropertiesIndexer.java
Index: LucenePropertiesIndexer.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/LucenePropertiesIndexer.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LucenePropertiesIndexer.java 18 Oct 2004 09:27:36 -0000 1.1
+++ LucenePropertiesIndexer.java 22 Oct 2004 15:14:45 -0000 1.2
@@ -24,13 +24,10 @@
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Date;
import java.util.Enumeration;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
-import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
@@ -43,11 +40,6 @@
import org.apache.commons.transaction.util.xa.AbstractXAResource;
import org.apache.commons.transaction.util.xa.TransactionalResource;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
import org.apache.slide.common.AbstractServiceBase;
import org.apache.slide.common.NamespaceAccessToken;
import org.apache.slide.common.ServiceAccessException;
@@ -82,24 +74,22 @@
private static final String INCLUDES = "includes";
- private static final String ANALYZER = "analyzer";
-
- public static final String URI_FIELD = "uri";
-
public static final String CONTENT_TEXT = "content";
+
+ private static final String USER_PROPERTIES_CONFIG = "user-properties-config";
//------------------
private String indexpath = "";
-
private Collection includes;
-
- private String analyzerClassName;
-
private boolean started = false;
+ private String userConfigName;
protected Index index = null;
+ /**
+ * XAResource implementation to delegate all XAResource requests to.
+ */
private XAResourceImpl xaResource = null;
/**
@@ -120,10 +110,14 @@
this.xaResource = new XAResourceImpl(txLogger);
try {
- this.index = new Index(this.indexpath, initAnalyzer(), getLogger());
+ this.index = new Index(this.indexpath, getLogger());
+ if (this.userConfigName != null) {
+ info("loading user properties config: {0}", this.userConfigName);
+ this.index.configuration.loadConfigurationFromResource(this.userConfigName);
+ }
}
- catch (org.apache.slide.index.lucene.IndexException e) {
- throw new ServiceInitializationFailedException(this, e);
+ catch (IndexException e) {
+ throw new ServiceInitializationFailedException(this, e);
}
}
@@ -142,11 +136,8 @@
debug("createIndex {0} {1}", uri, revisionDescriptor.getRevisionNumber());
TransactionalIndexResource indexResource = xaResource.getCurrentTxn();
-
- Document doc = createLuceneDocument(uri, revisionDescriptor);
-
- indexResource.addIndexJob(uri,
- revisionDescriptor.getRevisionNumber(), doc);
+ indexResource.addIndexJob(uri, revisionDescriptor.getRevisionNumber(),
+ revisionDescriptor);
}
@@ -167,20 +158,19 @@
NodeRevisionDescriptor revisionDescriptor,
NodeRevisionContent revisionContent) throws IndexException
{
- debug("updateIndex {0} {1}", uri, revisionDescriptor
- .getRevisionNumber());
+ debug("updateIndex {0} {1}", uri, revisionDescriptor.getRevisionNumber());
boolean needsUpdate = false;
for(Enumeration e = revisionDescriptor.enumerateUpdatedProperties();e.hasMoreElements();) {
NodeProperty property = (NodeProperty)e.nextElement();
- if(index.getConfiguration().isIndexedProperty(property)) {
+ if(index.getConfiguration().isIndexedProperty(property.getNamespace(), property.getName())) {
needsUpdate = true;
break;
}
}
for(Enumeration e = revisionDescriptor.enumerateUpdatedProperties();!needsUpdate && e.hasMoreElements();) {
NodeProperty property = (NodeProperty)e.nextElement();
- if(index.getConfiguration().isIndexedProperty(property)) {
+ if(index.getConfiguration().isIndexedProperty(property.getNamespace(), property.getName())) {
needsUpdate = true;
break;
}
@@ -188,11 +178,8 @@
if (needsUpdate) {
TransactionalIndexResource indexResource = xaResource.getCurrentTxn();
-
- Document doc = createLuceneDocument(uri, revisionDescriptor);
-
- indexResource.addRemoveJob(uri, revisionDescriptor.getRevisionNumber());
- indexResource.addIndexJob(uri, revisionDescriptor.getRevisionNumber(), doc);
+ indexResource.addUpdateJob(uri,
+ revisionDescriptor.getRevisionNumber(), revisionDescriptor);
}
}
@@ -209,76 +196,9 @@
debug("dropIndex {0} {1}", uri, number);
TransactionalIndexResource indexResource = xaResource.getCurrentTxn();
-
indexResource.addRemoveJob(uri, number);
}
- private Field unstoredString(String fieldName, String value) {
- return new Field(fieldName, value, false, true, false);
- }
- private Field storedString(String fieldName, String value) {
- return new Field(fieldName, value, true, true, false);
- }
- private Document createLuceneDocument(Uri uri, NodeRevisionDescriptor descriptor) {
-
- Document doc = new Document();
-
- IndexConfiguration config = index.getConfiguration();
-
- doc.add(unstoredString(Index.KEY_FIELD_NAME,
- config.generateKey(uri, descriptor.getRevisionNumber())));
- doc.add(storedString(Index.URI_FIELD_NAME, uri.toString()));
- for(Enumeration e = uri.getScopes(); e.hasMoreElements();) {
- doc.add(unstoredString(Index.SCOPE_FIELD_NAME, e.nextElement().toString()));
- }
-
- doc.add(unstoredString(Index.VERSION_FIELD_NAME,
- descriptor.getRevisionNumber().toString()));
-
- String rtype = descriptor.getResourceType();
- if (rtype.indexOf("collection") != -1) {
- doc.add(unstoredString(config.generateFieldName(
- NodeProperty.DEFAULT_NAMESPACE, "resourcetype"),
- "collection"));
- if (rtype.indexOf("principal") != -1) {
- doc.add(unstoredString(config.generateFieldName(
- NodeProperty.DEFAULT_NAMESPACE, "resourcetype"),
- "principal"));
- }
- }
-
- for(Enumeration e = descriptor.enumerateProperties(); e.hasMoreElements();) {
- NodeProperty property = (NodeProperty)e.nextElement();
- Object value = property.getValue();
-
- if (value == null) continue;
-
- if (config.isKeywordProperty(property)) {
- doc.add(unstoredString(config.generateFieldName(property), value.toString()));
- }
- if (config.isDateProperty(property)) {
- Date date = config.getDateValue(value);
- if (date != null) {
- doc.add(unstoredString(config.generateFieldName(property),
- config.dateToIndexString(date)));
- }
- }
- if (config.isIntProperty(property)) {
- try {
- doc.add(unstoredString(config.generateFieldName(property),
- config.intToIndexString(Long.parseLong(value.toString()))));
- } catch (NumberFormatException ex) {
- // TODO log warning
- }
- }
- if (config.supportsIsDefined(property)) {
- doc.add(unstoredString(Index.IS_DEFINED_FIELD_NAME,
- config.generateFieldName(property)));
- }
- }
-
- return doc;
- }
@@ -350,7 +270,9 @@
this.includes.add(tokenizer.nextToken());
}
}
- analyzerClassName = (String) parameters.get(ANALYZER);
+
+ userConfigName = (String)parameters.get(USER_PROPERTIES_CONFIG);
+
}
/**
@@ -398,38 +320,6 @@
return false;
}
- protected Analyzer initAnalyzer() throws ServiceInitializationFailedException
- {
-
- if (analyzerClassName == null || analyzerClassName.length() == 0) {
- info("using Lucene StandardAnalyzer");
- return new StandardAnalyzer();
-
- } else {
- info("using Lucene analyzer: {0}", analyzerClassName);
-
- try {
- Class analyzerClazz = Class.forName(analyzerClassName);
- return (Analyzer) analyzerClazz.newInstance();
-
- } catch (ClassNotFoundException e) {
- error("Error while instantiating analyzer {0} {1}",
- analyzerClassName, e.getMessage());
- throw new ServiceInitializationFailedException(this, e);
-
- } catch (InstantiationException e) {
- error("Error while instantiating analyzer {0} {1}",
- analyzerClassName, e.getMessage());
- throw new ServiceInitializationFailedException(this, e);
-
- } catch (IllegalAccessException e) {
- error("Error while instantiating analyzer {0} {1}",
- analyzerClassName, e.getMessage());
- throw new ServiceInitializationFailedException(this, e);
- }
- }
- }
-
// -------------------------------------------------------------------------
// XAResource interface, all request are deletgated to this.xaResource
@@ -541,7 +431,7 @@
AbstractTransactionalResource
{
private Index idx;
- private Map indexJobs = new HashMap();
+ private Set indexJobs = new HashSet();
private Set removeJobs = new HashSet();
TransactionalIndexResource(Xid xid, Index index)
@@ -550,14 +440,18 @@
this.idx = index;
}
- void addIndexJob(Uri uri, NodeRevisionNumber version, Document doc) {
- String key = idx.getConfiguration().generateKey(uri, version);
- this.indexJobs.put(key, doc);
+ void addIndexJob(Uri uri, NodeRevisionNumber version, NodeRevisionDescriptor descriptor) {
+ this.indexJobs.add(this.idx.createIndexJob(uri, version, descriptor));
}
void addRemoveJob(Uri uri, NodeRevisionNumber version) {
- String key = idx.getConfiguration().generateKey(uri, version);
- this.indexJobs.remove(key);
- this.removeJobs.add(key);
+ Index.IndexJob job = idx.createDeleteJob(uri, version);
+ this.indexJobs.remove(job);
+ this.removeJobs.add(job);
+ }
+ void addUpdateJob(Uri uri, NodeRevisionNumber version, NodeRevisionDescriptor descriptor) {
+ Index.IndexJob job = idx.createIndexJob(uri, version, descriptor);
+ this.removeJobs.add(job);
+ this.indexJobs.add(job);
}
public void begin() throws XAException
@@ -567,8 +461,8 @@
public void commit() throws XAException
{
try {
- this.idx.addIndexJob(this.indexJobs, this.removeJobs);
- } catch (IOException e) {
+ this.idx.executeIndexTransaction(this.removeJobs, this.indexJobs);
+ } catch (IndexException e) {
throw new XAException(e.toString());
}
}
1.1 jakarta-slide/src/stores/org/apache/slide/index/lucene/properties.xml
Index: properties.xml
===================================================================
<indexed-properties xmlns:d="DAV:" xmlns:s="http://jakarta.apache.org/slide/">
<d:displayname>
<string/><is-defined/>
</d:displayname>
<d:getcontenttype>
<string/><is-defined/>
</d:getcontenttype>
<d:getcontentlanguage>
<string/><is-defined/>
</d:getcontentlanguage>
<d:getcontentlength>
<integer/>
</d:getcontentlength>
<d:getlastmodified>
<date/>
</d:getlastmodified>
<d:creationdate>
<date/>
</d:creationdate>
<d:modificationdate>
<date/>
</d:modificationdate>
<d:owner>
<string/><is-defined/>
</d:owner>
<d:modificationuser>
<string/><is-defined/>
</d:modificationuser>
<d:checked-in>
<string/><is-defined/>
</d:checked-in>
<d:checked-out>
<string/><is-defined/>
</d:checked-out>
</indexed-properties>
1.2 +6 -6 jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/BetweenExpression.java
Index: BetweenExpression.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/BetweenExpression.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- BetweenExpression.java 18 Oct 2004 09:27:36 -0000 1.1
+++ BetweenExpression.java 22 Oct 2004 15:14:46 -0000 1.2
@@ -61,19 +61,19 @@
IndexConfiguration config = index.getConfiguration();
Element prop = getPropertyElement(element);
- String field = config.generateFieldName(prop);
+ String field = config.generateFieldName(prop.getNamespaceURI(), prop.getName());
Element literal1 = getLiteralElement(element);
Element literal2 = getLiteral2Element(element);
String value1;
String value2;
- if (index.getConfiguration().isDateProperty(prop)) {
+ if (index.getConfiguration().isDateProperty(prop.getNamespaceURI(), prop.getName())) {
value1 = config.dateToIndexString(
config.getDateValue(literal1.getTextTrim()));
value2 = config.dateToIndexString(
config.getDateValue(literal2.getTextTrim()));
}
- else if (index.getConfiguration().isIntProperty(prop)) {
+ else if (index.getConfiguration().isIntProperty(prop.getNamespaceURI(), prop.getName())) {
value1 = config.intToIndexString(Long.parseLong(literal1.getTextTrim()));
value2 = config.intToIndexString(Long.parseLong(literal2.getTextTrim()));
}
1.2 +6 -6 jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/EqExpression.java
Index: EqExpression.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/EqExpression.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- EqExpression.java 18 Oct 2004 09:27:36 -0000 1.1
+++ EqExpression.java 22 Oct 2004 15:14:46 -0000 1.2
@@ -43,14 +43,14 @@
IndexConfiguration config = index.getConfiguration();
Element prop = getPropertyElement(element);
- String field = config.generateFieldName(prop);
+ String field = config.generateFieldName(prop.getNamespaceURI(), prop.getName());
Element literal = getLiteralElement(element);
String value;
- if (index.getConfiguration().isDateProperty(prop)) {
+ if (index.getConfiguration().isDateProperty(prop.getNamespaceURI(), prop.getName())) {
value = config.dateToIndexString(config.getDateValue(literal.getTextTrim()));
}
- else if (index.getConfiguration().isIntProperty(prop)) {
+ else if (index.getConfiguration().isIntProperty(prop.getNamespaceURI(), prop.getName())) {
value = config.intToIndexString(Long.parseLong(literal.getTextTrim()));
}
else {
1.2 +6 -6 jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/GtExpression.java
Index: GtExpression.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/GtExpression.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- GtExpression.java 18 Oct 2004 09:27:36 -0000 1.1
+++ GtExpression.java 22 Oct 2004 15:14:46 -0000 1.2
@@ -44,17 +44,17 @@
IndexConfiguration config = index.getConfiguration();
Element prop = getPropertyElement(element);
- String field = config.generateFieldName(prop);
+ String field = config.generateFieldName(prop.getNamespaceURI(), prop.getName());
Element literal = getLiteralElement(element);
String value;
String upperBound;
- if (index.getConfiguration().isDateProperty(prop)) {
+ if (index.getConfiguration().isDateProperty(prop.getNamespaceURI(), prop.getName())) {
Date date = config.getDateValue(literal.getTextTrim());
value = config.dateToIndexString(date);
upperBound = Index.DATE_UPPER_BOUND;
}
- else if (index.getConfiguration().isIntProperty(prop)) {
+ else if (index.getConfiguration().isIntProperty(prop.getNamespaceURI(), prop.getName())) {
value = config.intToIndexString(Long.parseLong(literal.getTextTrim()));
upperBound = Index.INT_UPPER_BOUND;
}
1.2 +5 -5 jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/IsDefinedExpression.java
Index: IsDefinedExpression.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/IsDefinedExpression.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- IsDefinedExpression.java 18 Oct 2004 09:27:36 -0000 1.1
+++ IsDefinedExpression.java 22 Oct 2004 15:14:46 -0000 1.2
@@ -44,10 +44,10 @@
IndexConfiguration config = index.getConfiguration();
Element prop = getPropertyElement(element);
- String field = config.generateFieldName(prop);
+ String field = config.generateFieldName(prop.getNamespaceURI(), prop.getName());
- if (config.supportsIsDefined(prop)) {
+ if (config.supportsIsDefined(prop.getNamespaceURI(), prop.getName())) {
setQuery(new TermQuery(new Term(Index.IS_DEFINED_FIELD_NAME, field)));
}
1.2 +4 -4 jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/LikeExpression.java
Index: LikeExpression.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/LikeExpression.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LikeExpression.java 18 Oct 2004 09:27:36 -0000 1.1
+++ LikeExpression.java 22 Oct 2004 15:14:46 -0000 1.2
@@ -46,7 +46,7 @@
IndexConfiguration config = index.getConfiguration();
Element prop = getPropertyElement(element);
- String field = config.generateFieldName(prop);
+ String field = config.generateFieldName(prop.getNamespaceURI(), prop.getName());
Element literal = getLiteralElement(element);
String text = literal.getTextTrim();
1.2 +6 -6 jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/LtExpression.java
Index: LtExpression.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/LtExpression.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LtExpression.java 18 Oct 2004 09:27:36 -0000 1.1
+++ LtExpression.java 22 Oct 2004 15:14:46 -0000 1.2
@@ -46,12 +46,12 @@
IndexConfiguration config = index.getConfiguration();
Element prop = getPropertyElement(element);
- String field = config.generateFieldName(prop);
+ String field = config.generateFieldName(prop.getNamespaceURI(), prop.getName());
Element literal = getLiteralElement(element);
String value;
String lowerBound;
- if (index.getConfiguration().isDateProperty(prop)) {
+ if (index.getConfiguration().isDateProperty(prop.getNamespaceURI(), prop.getName())) {
Date date = config.getDateValue(literal.getTextTrim());
Calendar c = Calendar.getInstance();
c.setTime(date);
@@ -59,7 +59,7 @@
value = config.dateToIndexString(date);
lowerBound = Index.DATE_LOWER_BOUND;
}
- else if (index.getConfiguration().isIntProperty(prop)) {
+ else if (index.getConfiguration().isIntProperty(prop.getNamespaceURI(), prop.getName())) {
value = config.intToIndexString(Long.parseLong(literal.getTextTrim()));
lowerBound = Index.INT_LOWER_BOUND;
}
1.2 +7 -5 jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/PropcontainsExpression.java
Index: PropcontainsExpression.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/PropcontainsExpression.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- PropcontainsExpression.java 18 Oct 2004 09:27:36 -0000 1.1
+++ PropcontainsExpression.java 22 Oct 2004 15:14:46 -0000 1.2
@@ -32,7 +32,9 @@
/**
- * Implements the <code>propcontains</code> operator.
+ * Implements the <code>propcontains</code> operator.
+ *
+ * <p>This expression is actually a <code>substring</code> expression.
*
*/
public class PropcontainsExpression extends AbstractExpression
@@ -44,7 +46,7 @@
IndexConfiguration config = index.getConfiguration();
Element prop = getPropertyElement(element);
- String field = config.generateFieldName(prop);
+ String field = config.generateFieldName(prop.getNamespaceURI(), prop.getName());
Element literal = getLiteralElement(element);
String text = literal.getTextTrim();
1.1 jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/PropertyContainsExpression.java
Index: PropertyContainsExpression.java
===================================================================
// vi: set ts=3 sw=3:
package org.apache.slide.index.lucene.expressions;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.slide.index.lucene.Index;
import org.apache.slide.index.lucene.IndexConfiguration;
import org.apache.slide.search.BadQueryException;
import org.jdom.Element;
/**
* Implements the <code>property-contains</code> expression, that works
* exacly like <code>contains</code> but on properties.
*/
public class PropertyContainsExpression extends AbstractExpression
{
public PropertyContainsExpression(Index index, Element element)
throws BadQueryException
{
super(index);
IndexConfiguration config = index.getConfiguration();
Element prop = getPropertyElement(element);
String field = config.generateFieldName(prop.getNamespaceURI(),
prop.getName());
String literal = getLiteralElement(element).getText();
int starPos = literal.indexOf('*');
int qmPos = literal.indexOf('?');
if (starPos != -1 || qmPos != -1) {
if (starPos == literal.length()-1 && qmPos == -1) {
// some thing like "word*"
// TODO the .toLowerCase() should depend from the Analyzer
setQuery(new PrefixQuery(new Term(field,
literal.substring(0, literal.length()-1).toLowerCase())));
} else {
// TODO dito
setQuery(new WildcardQuery(new Term(field, literal.toLowerCase())));
}
} else {
termOrPhraseQuery(config, field, literal);
}
}
private void termOrPhraseQuery(IndexConfiguration config, String field, String text)
{
TokenStream ts = config.getAnalyzer().tokenStream(field,
new StringReader(text));
ArrayList tokens = new ArrayList(20);
try {
for (Token t = ts.next(); t != null; t = ts.next()) {
tokens.add(t.termText());
}
} catch (IOException e) {
// should not happen, because we are reading from StringReader
}
if (tokens.size() > 1) {
PhraseQuery phraseQuery = new PhraseQuery();
for(int i = 0, l = tokens.size(); i<l; i++) {
phraseQuery.add(new Term(field, (String)tokens.get(i)));
}
setQuery(phraseQuery);
} else if (tokens.size() == 1) {
setQuery(new TermQuery(new Term(field, (String)tokens.get(0))));
} else {
// TODO NOP query???
setQuery(new BooleanQuery());
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: slide-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: slide-dev-help@jakarta.apache.org