You are viewing a plain text version of this content. The canonical link for it is here.
Posted to slide-dev@jakarta.apache.org by ja...@apache.org on 2004/11/12 12:43:55 UTC

[Jakarta-Slide Wiki] Updated: DaslConfiguration

   Date: 2004-11-12T03:43:55
   Editor: StefanLützkendorf <lu...@apache.org>
   Wiki: Jakarta-Slide Wiki
   Page: DaslConfiguration
   URL: http://wiki.apache.org/jakarta-slide/DaslConfiguration

   no comment

Change Log:

------------------------------------------------------------------------------
@@ -3,7 +3,9 @@
 == RDMBS ==
 TODO
 
-== Lucene based properties indexer ==
+== Searching meta-data with the Lucene based properties indexer ==
+
+'''Note''' this is under delevlopment, and will be part of Slide 2.2. To check this out you can use cvs HEAD.
 
 Searching the meta data.
 
@@ -13,7 +15,7 @@
 
  {{{
   <propertiesindexer classname="org.apache.slide.index.lucene.LucenePropertiesIndexer">
-       <parameter name="indexpath">txfile/store1/index</parameter>
+       <parameter name="indexpath">store/index/metadata</parameter>
   </propertiesindexer>
  }}}
 
@@ -22,6 +24,9 @@
 || '''parameter''' || '''description''' || '''required/default''' ||
 || indexpath       || directory where the index data is stored || true/none ||
 || asynchron       || If set to ''false'' the index is updated inside the transaction. If set to ''true'' the index in updated on a separate thread. So the transaction can be finished ''before'' the index is updated. || no/false ||
+|| priority        || Priority ofthe indexing thread if ''asynchron'' is ''true''. Must be a value between Thread.MIN_PRIORITY  and Thread.MAX_PRIORITY  || no/Thread.NORM_PRIORITY  ||
+|| includes || A comma separated list of pathes for which indexix should happen. If empty all inthe store is indexed || no||
+|| optimization-threshold || The number of write accesses to the index after which the index is optimized || no/100 ||
 
 === supported DASL operators and data types ===
 
@@ -34,6 +39,7 @@
 {{{integer}}} indexed as a normalized integer string (between Long.MIN_VALUE and Long.MAX_VALUE)
 
 {{{text}}}    indexed in a tokenized and normalized form (normalized using Lucene analyzers)
+
  
 ||                   || string || date || integer || text ||
 ||eq                 ||    *   ||  *   ||    *    ||   -  ||
@@ -63,66 +69,36 @@
 
 To reduce the indexing overhead, not all properties are index by default. For properties that are not indexed the default search implementation we be called.
 
-'''default index configuration'''
- {{{
-<indexed-properties xmlns:d="DAV:" xmlns:s="http://jakarta.apache.org/slide/">
-  <d:displayname>
-    <string/><is-defined/>
-  </d:displayname>
-  <d:getcontenttype>
-    <string/><is-defined/>
-  </d:getcontenttype>
-  <d:getcontentlanguage>
-    <string/><is-defined/>
-  </d:getcontentlanguage>
-  <d:getcontentlength>
-    <integer/>
-  </d:getcontentlength>
-
-  <d:getlastmodified>
-    <date/>
-  </d:getlastmodified>
-  <d:creationdate>
-    <date/>
-  </d:creationdate>
-
-  <d:owner>
-    <string/><is-defined/>
-  </d:owner>
-
-  <!-- they are updated quite often, e.g. for each collection where a member
-       is added ore removed
-  <d:modificationdate>
-    <date/>
-  </d:modificationdate>
-  <d:modificationuser>
-    <string/><is-defined/>
-  </d:modificationuser>
-  -->
-
-  <!-- TODO what about href properties -->
-  <d:checked-in>
-    <string/><is-defined/>
-  </d:checked-in>
-  <d:checked-out>
-    <string/><is-defined/>
-  </d:checked-out>
+By default the following properties are indexed:
 
-</indexed-properties>
-}}}
+|| '''namespace''' || '''property'''     || '''type''' ||
+|| DAV:            || displayname        || string     ||
+|| DAV:            || getcontenttype     || string     ||
+|| DAV:            || getcontentlanguage || string     ||
+|| DAV:            || getcontentlength   || integer    ||
+|| DAV:            || getlastmodified    || date       ||
+|| DAV:            || creationdate       || date       ||
 
 '''User defined text properties'''
+
+You can add additional properties to the indexing, including user defined properties.
+
+The following sample defines two user defined properties in the namepace ''http://any.domain/test/''. Both are text properties analyzed with different analyzers.
  {{{
-<indexed-properties xmlns:d="DAV:" xmlns:u="http://any.domain/test/">
-  <u:abstract>
-    <text analyzer="org.apache.lucene.analysis.de.GermanAnalyzer"/>
-    <is-defined/>
-  </u:abstract>
-  <u:keywords>
-    <text analyzer="org.apache.lucene.analysis.WhitespaceAnalyzer"/>
-    <is-defined/>
-  </u:keywords>
-</indexed-properties>}}}
+  <propertiesindexer classname="org.apache.slide.index.lucene.LucenePropertiesIndexer">
+    <parameter name="indexpath">${datapath}/store1/index/metadata</parameter>
+    <configuration name="indexed-properties">
+      <property name="abstract" namespace="http://any.domain/test/">
+        <text analyzer="org.apache.lucene.analysis.de.GermanAnalyzer"/>
+        <is-defined/>
+      </property>
+      <property name="keywords" namespace="http://any.domain/test/">
+        <text analyzer="org.apache.lucene.analysis.WhitespaceAnalyzer"/>
+        <is-defined/>
+      </property>
+    </configuration>
+  </propertiesindexer>
+}}}
 
 === Operators (extensions) ===
 '''Operator {{{property-contains}}}'''
@@ -172,3 +148,26 @@
    <D:prop><u:abstract/></D:prop>
    <D:literal>a longer phrase of text</D:literal>
  </S:property-contains>}}}
+
+== Searching content with the Lucene based content indexer ==
+
+=== Enabling ===
+
+To use this indexer add the following to your store definition.
+
+ {{{
+  <contentindexer classname="org.apache.slide.index.lucene.LuceneContentIndexer">
+       <parameter name="indexpath">store/index/content</parameter>
+  </contentindexer>
+ }}}
+
+Parameter
+ 
+|| '''parameter''' || '''description''' || '''required/default''' ||
+|| indexpath       || directory where the index data is stored || true/none ||
+|| asynchron       || If set to ''false'' the index is updated inside the transaction. If set to ''true'' the index in updated on a separate thread. So the transaction can be finished ''before'' the index is updated. || no/false ||
+|| priority        || Priority ofthe indexing thread if ''asynchron'' is ''true''. Must be a value between Thread.MIN_PRIORITY  and Thread.MAX_PRIORITY  || no/Thread.NORM_PRIORITY  ||
+|| includes || A comma separated list of pathes for which indexix should happen. If empty all inthe store is indexed || no||
+|| optimization-threshold || The number of write accesses to the index after which the index is optimized || no/100 ||
+|| analyzer ||          ||    ||
+

---------------------------------------------------------------------
To unsubscribe, e-mail: slide-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: slide-dev-help@jakarta.apache.org