You are viewing a plain text version of this content. The canonical link for it is here.
Posted to by on 2013/06/05 09:23:15 UTC

svn commit: r1489728 [3/3] - in /stanbol/trunk/enhancement-engines/topic/engine: ./ src/main/java/org/apache/stanbol/enhancer/engine/topic/ src/main/java/org/apache/stanbol/enhancer/topic/ src/main/java/org/apache/stanbol/enhancer/topic/training/ src/m...

Modified: stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-trainingset/conf/solrconfig.xml
--- stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-trainingset/conf/solrconfig.xml (original)
+++ stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-trainingset/conf/solrconfig.xml Wed Jun  5 07:23:15 2013
@@ -47,466 +47,69 @@
        that you fully re-index after changing this setting as it can
        affect both how text is indexed and queried.
-  <luceneMatchVersion>LUCENE_32</luceneMatchVersion>
+  <luceneMatchVersion>LUCENE_42</luceneMatchVersion>
-  <!-- lib directives can be used to instruct Solr to load an Jars
-       identified and use them to resolve any "plugins" specified in
-       your solrconfig.xml or schema.xml (ie: Analyzers, Request
-       Handlers, etc...).
-       All directories and paths are resolved relative to the
-       instanceDir.
-       If a "./lib" directory exists in your instanceDir, all files
-       found in it are included as if you had used the following
-       syntax...
-              <lib dir="./lib" />
-    -->
-  <!-- A dir option by itself adds any files found in the directory to
-       the classpath, this is useful for including all jars in a
-       directory.
-    -->
-  <lib dir="../../contrib/extraction/lib" />
-  <!-- When a regex is specified in addition to a directory, only the
-       files in that directory which completely match the regex
-       (anchored on both ends) will be included.
-    -->
-  <lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
-  <lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
-  <lib dir="../../dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" />
-  <!-- If a dir option (with or without a regex) is used and nothing
-       is found that matches, it will be ignored
-    -->
-  <lib dir="../../contrib/clustering/lib/" />
-  <lib dir="/total/crap/dir/ignored" /> 
-  <!-- an exact path can be used to specify a specific file.  This
-       will cause a serious error to be logged if it can't be loaded.
-    -->
-  <!--
-  <lib path="../a-jar-that-does-not-exist.jar" /> 
-  -->
-  <!-- Data Directory
-       Used to specify an alternate directory to hold all index data
-       other than the default ./data under the Solr home.  If
-       replication is in use, this should match the replication
-       configuration.
-    -->
-  <!-- The DirectoryFactory to use for indexes.
-       solr.StandardDirectoryFactory, the default, is filesystem
-       based.  solr.RAMDirectoryFactory is memory based, not
-       persistent, and doesn't work with replication.
-    -->
   <directoryFactory name="DirectoryFactory" 
-                    class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
-  <!-- Index Defaults
-       Values here affect all index writers and act as a default
-       unless overridden.
-       WARNING: See also the <mainIndex> section below for parameters
-       that overfor Solr's main Lucene index.
-    -->
-  <indexDefaults>
-    <useCompoundFile>false</useCompoundFile>
-    <mergeFactor>10</mergeFactor>
-    <!-- Sets the amount of RAM that may be used by Lucene indexing
-         for buffering added documents and deletions before they are
-         flushed to the Directory.  -->
-    <ramBufferSizeMB>32</ramBufferSizeMB>
-    <!-- If both ramBufferSizeMB and maxBufferedDocs is set, then
-         Lucene will flush based on whichever limit is hit first.  
-      -->
-    <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
-    <maxFieldLength>100000</maxFieldLength>
-    <writeLockTimeout>1000</writeLockTimeout>
-    <commitLockTimeout>10000</commitLockTimeout>
-    <!-- Expert: Merge Policy 
-         The Merge Policy in Lucene controls how merging is handled by
-         Lucene.  The default in Solr 3.3 is TieredMergePolicy.
-         The default in 2.3 was the LogByteSizeMergePolicy,
-         previous versions used LogDocMergePolicy.
-         LogByteSizeMergePolicy chooses segments to merge based on
-         their size.  The Lucene 2.2 default, LogDocMergePolicy chose
-         when to merge based on number of documents
-         Other implementations of MergePolicy must have a no-argument
-         constructor
-      -->
-    <!--
-       <mergePolicy class="org.apache.lucene.index.TieredMergePolicy"/>
-       -->
-    <!-- Expert: Merge Scheduler
-         The Merge Scheduler in Lucene controls how merges are
-         performed.  The ConcurrentMergeScheduler (Lucene 2.3 default)
-         can perform merges in the background using separate threads.
-         The SerialMergeScheduler (Lucene 2.2 default) does not.
-     -->
-    <!-- 
-       <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
-       -->
-    <!-- LockFactory 
-         This option specifies which Lucene LockFactory implementation
-         to use.
-         single = SingleInstanceLockFactory - suggested for a
-                  read-only index or when there is no possibility of
-                  another process trying to modify the index.
-         native = NativeFSLockFactory - uses OS native file locking.
-                  Do not use when multiple solr webapps in the same
-                  JVM are attempting to share a single index.
-         simple = SimpleFSLockFactory  - uses a plain file for locking
-         (For backwards compatibility with Solr 1.2, 'simple' is the
-         default if not specified.)
-         More details on the nuances of each LockFactory...
-    -->
-    <lockType>native</lockType>
-    <!-- Expert: Controls how often Lucene loads terms into memory
-         Default is 128 and is likely good for most everyone.
-      -->
-    <!-- <termIndexInterval>256</termIndexInterval> -->
-  </indexDefaults>
-  <!-- Main Index
+                    class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> 
-       Values here override the values in the <indexDefaults> section
-       for the main on disk index.
-    -->
-  <mainIndex>
-    <useCompoundFile>false</useCompoundFile>
-    <ramBufferSizeMB>32</ramBufferSizeMB>
-    <mergeFactor>10</mergeFactor>
-    <!-- Unlock On Startup
-         If true, unlock any held write or commit locks on startup.
-         This defeats the locking mechanism that allows multiple
-         processes to safely access a lucene index, and should be used
-         with care.
-         This is not needed if lock type is 'none' or 'single'
-     -->
-    <unlockOnStartup>false</unlockOnStartup>
-    <!-- If true, IndexReaders will be reopened (often more efficient)
-         instead of closed and then opened.
-      -->
-    <reopenReaders>true</reopenReaders>
-    <!-- Commit Deletion Policy
-         Custom deletion policies can specified here. The class must
-         implement org.apache.lucene.index.IndexDeletionPolicy.
-         The standard Solr IndexDeletionPolicy implementation supports
-         deleting index commit points on number of commits, age of
-         commit point and optimized status.
-         The latest commit point should always be preserved regardless
-         of the criteria.
-    -->
-    <deletionPolicy class="solr.SolrDeletionPolicy">
-      <!-- The number of commit points to be kept -->
-      <str name="maxCommitsToKeep">1</str>
-      <!-- The number of optimized commit points to be kept -->
-      <str name="maxOptimizedCommitsToKeep">0</str>
-      <!--
-          Delete all commit points once they have reached the given age.
-          Supports DateMathParser syntax e.g.
-        -->
-      <!--
-         <str name="maxCommitAge">30MINUTES</str>
-         <str name="maxCommitAge">1DAY</str>
-      -->
-    </deletionPolicy>
-    <!-- Lucene Infostream
-         To aid in advanced debugging, Lucene provides an "InfoStream"
-         of detailed information when indexing.
-         Setting The value to true will instruct the underlying Lucene
-         IndexWriter to write its debugging info the specified file
-      -->
-     <infoStream file="INFOSTREAM.txt">false</infoStream> 
-  </mainIndex>
+  <indexConfig>
+    <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a 
+         LimitTokenCountFilterFactory in your fieldType definition. E.g. 
+     <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
+    -->
+    <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
+    <writeLockTimeout>5000</writeLockTimeout>
+ </indexConfig>
+ <jmx />
+   <!-- The default high-performance update handler -->
-  <!-- JMX
-       This example enables JMX if and only if an existing MBeanServer
-       is found, use this if you want to configure JMX through JVM
-       parameters. Remove this to disable exposing Solr configuration
-       and statistics to JMX.
-       For more details see
-    -->
-  <jmx />
-  <!-- If you want to connect to a particular server, specify the
-       agentId 
-    -->
-  <!-- <jmx agentId="myAgent" /> -->
-  <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
-  <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
-    -->
-  <!-- The default high-performance update handler -->
   <updateHandler class="solr.DirectUpdateHandler2">
-    <!-- AutoCommit
-         Perform a <commit/> automatically under certain conditions.
-         Instead of enabling autoCommit, consider using "commitWithin"
-         when adding documents. 
-         maxDocs - Maximum number of documents to add since the last
-                   commit before automatically triggering a new commit.
-         maxTime - Maximum amount of time that is allowed to pass
-                   since a document was added before automaticly
-                   triggering a new commit. 
-      -->
-    <!--
-       <autoCommit> 
-         <maxDocs>10000</maxDocs>
+    <!-- Deactivate transaction log 
+    <updateLog>
+      <str name="dir">${solr.ulog.dir:}</str>
+    </updateLog > -->
+    <!-- no auto commit
+     <autoCommit> 
+       <maxTime>15000</maxTime> 
+       <openSearcher>false</openSearcher> 
+     </autoCommit>
+     -->
+     <!--
+       <autoSoftCommit> 
-       </autoCommit>
-      -->
-    <!-- Update Related Event Listeners
-         Various IndexWriter related events can trigger Listeners to
-         take actions.
-         postCommit - fired after every commit or optimize command
-         postOptimize - fired after every optimize command
-      -->
-    <!-- The RunExecutableListener executes an external command from a
-         hook such as postCommit or postOptimize.
-         exe - the name of the executable to run
-         dir - dir to use as the current working directory. (default=".")
-         wait - the calling thread waits until the executable returns. 
-                (default="true")
-         args - the arguments to pass to the program.  (default is none)
-         env - environment variables to set.  (default is none)
-      -->
-    <!-- This example shows how RunExecutableListener could be used
-         with the script based replication...
-      -->
-    <!--
-       <listener event="postCommit" class="solr.RunExecutableListener">
-         <str name="exe">solr/bin/snapshooter</str>
-         <str name="dir">.</str>
-         <bool name="wait">true</bool>
-         <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
-         <arr name="env"> <str>MYVAR=val1</str> </arr>
-       </listener>
+       </autoSoftCommit>
-  <!-- IndexReaderFactory
-       Use the following format to specify a custom IndexReaderFactory,
-       which allows for alternate IndexReader implementations.
-       ** Experimental Feature **
-       Please note - Using a custom IndexReaderFactory may prevent
-       certain other features from working. The API to
-       IndexReaderFactory may change without warning or may even be
-       removed from future releases if the problems cannot be
-       resolved.
-       ** Features that may not work with custom IndexReaderFactory **
-       The ReplicationHandler assumes a disk-resident index. Using a
-       custom IndexReader implementation may cause incompatibility
-       with ReplicationHandler and may cause replication to not work
-       correctly. See SOLR-1366 for details.
-    -->
-  <!--
-  <indexReaderFactory name="IndexReaderFactory" class="package.class">
-    <str name="someArg">Some Value</str>
-  </indexReaderFactory >
-  -->
-  <!-- By explicitly declaring the Factory, the termIndexDivisor can
-       be specified.
-    -->
-  <!--
-     <indexReaderFactory name="IndexReaderFactory" 
-                         class="solr.StandardIndexReaderFactory">
-       <int name="setTermIndexDivisor">12</int>
-     </indexReaderFactory >
-    -->
-    <!-- Max Boolean Clauses
-         Maximum number of clauses in each BooleanQuery,  an exception
-         is thrown if exceeded.
-         ** WARNING **
-         This option actually modifies a global Lucene property that
-         will affect all SolrCores.  If multiple solrconfig.xml files
-         disagree on this property, the value at any given moment will
-         be based on the last SolrCore to be initialized.
-      -->
-    <!-- Solr Internal Query Caches
-         There are two implementations of cache available for Solr,
-         LRUCache, based on a synchronized LinkedHashMap, and
-         FastLRUCache, based on a ConcurrentHashMap.  
-         FastLRUCache has faster gets and slower puts in single
-         threaded operation and thus is generally faster than LRUCache
-         when the hit ratio of the cache is high (> 75%), and may be
-         faster under other scenarios on multi-cpu systems.
-    -->
-    <!-- Filter Cache
-         Cache used by SolrIndexSearcher for filters (DocSets),
-         unordered sets of *all* documents that match a query.  When a
-         new searcher is opened, its caches may be prepopulated or
-         "autowarmed" using data from caches in the old searcher.
-         autowarmCount is the number of items to prepopulate.  For
-         LRUCache, the autowarmed items will be the most recently
-         accessed items.
-         Parameters:
-           class - the SolrCache implementation LRUCache or
-               (LRUCache or FastLRUCache)
-           size - the maximum number of entries in the cache
-           initialSize - the initial capacity (number of entries) of
-               the cache.  (see java.util.HashMap)
-           autowarmCount - the number of entries to prepopulate from
-               and old cache.  
-      -->
     <filterCache class="solr.FastLRUCache"
-                 size="512"
-                 initialSize="512"
-                 autowarmCount="0"/>
-    <!-- Query Result Cache
-         Caches results of searches - ordered lists of document ids
-         (DocList) based on a query, a sort, and the range of documents requested.  
-      -->
+                 size="2048"
+                 initialSize="1024"
+                 autowarmCount="512"/>
     <queryResultCache class="solr.LRUCache"
-                     size="512"
-                     initialSize="512"
-                     autowarmCount="0"/>
-    <!-- Document Cache
-         Caches Lucene Document objects (the stored fields for each
-         document).  Since Lucene internal document ids are transient,
-         this cache will not be autowarmed.  
-      -->
+                     size="2048"
+                     initialSize="1024"
+                     autowarmCount="512"/>
     <documentCache class="solr.LRUCache"
-                   size="512"
-                   initialSize="512"
+                   size="4096"
+                   initialSize="1024"
-    <!-- Field Value Cache
-         Cache used to hold field values that are quickly accessible
-         by document id.  The fieldValueCache is created by default
-         even if not configured here.
-      -->
        <fieldValueCache class="solr.FastLRUCache"
                         showItems="32" />
-    <!-- Custom Cache
-         Example of a generic cache.  These caches may be accessed by
-         name through SolrIndexSearcher.getCache(),cacheLookup(), and
-         cacheInsert().  The purpose is to enable easy caching of
-         user/application level data.  The regenerator argument should
-         be specified as an implementation of solr.CacheRegenerator 
-         if autowarming is desired.  
-      -->
-    <!--
-       <cache name="myUserCache"
-              class="solr.LRUCache"
-              size="4096"
-              initialSize="1024"
-              autowarmCount="1024"
-              regenerator="com.mycompany.MyRegenerator"
-              />
-      -->
-    <!-- Lazy Field Loading
-         If true, stored fields that are not requested will be loaded
-         lazily.  This can result in a significant speed improvement
-         if the usual case is to not load all stored fields,
-         especially if the skipped fields are large compressed text
-         fields.
-    -->
-   <!-- Use Filter For Sorted Query
-        A possible optimization that attempts to use a filter to
-        satisfy a search.  If the requested sort does not include
-        score, then the filterCache will be checked for a filter
-        matching the query. If found, the filter will be used as the
-        source of document ids, and then the sort will be applied to
-        that.
-        For most situations, this will not be useful unless you
-        frequently get the same search repeatedly with different sort
-        options, and none of them ever use "score"
-     -->
-   <!--
-      <useFilterForSortedQuery>true</useFilterForSortedQuery>
-     -->
    <!-- Result Window Size
         An optimization for use with the queryResultCache.  When a search
@@ -523,22 +126,6 @@
-   <!-- Query Related Event Listeners
-        Various IndexSearcher related events can trigger Listeners to
-        take actions.
-        newSearcher - fired whenever a new searcher is being prepared
-        and there is a current searcher handling requests (aka
-        registered).  It can be used to prime certain caches to
-        prevent long request times for certain requests.
-        firstSearcher - fired whenever a new searcher is being
-        prepared but there is no current registered searcher to handle
-        requests or to gain autowarming data from.
-     -->
     <!-- QuerySenderListener takes an array of NamedList and executes a
          local query request for each NamedList in sequence. 
@@ -580,233 +167,75 @@
-  <!-- Request Dispatcher
-       This section contains instructions for how the SolrDispatchFilter
-       should behave when processing requests for this SolrCore.
-       handleSelect affects the behavior of requests such as /select?qt=XXX
-       handleSelect="true" will cause the SolrDispatchFilter to process
-       the request and will result in consistent error handling and
-       formatting for all types of requests.
-       handleSelect="false" will cause the SolrDispatchFilter to
-       ignore "/select" requests and fallback to using the legacy
-       SolrServlet and it's Solr 1.1 style error formatting
-    -->
-  <requestDispatcher handleSelect="true" >
-    <!-- Request Parsing
-         These settings indicate how Solr Requests may be parsed, and
-         what restrictions may be placed on the ContentStreams from
-         those requests
-         enableRemoteStreaming - enables use of the stream.file
-         and stream.url parameters for specifying remote streams.
-         multipartUploadLimitInKB - specifies the max size of
-         Multipart File Uploads that Solr will allow in a Request.
-         *** WARNING ***
-         The settings below authorize Solr to fetch remote files, You
-         should make sure your system has some authentication before
-         using enableRemoteStreaming="true"
-      --> 
+  <requestDispatcher handleSelect="false" >
     <requestParsers enableRemoteStreaming="true" 
-                    multipartUploadLimitInKB="2048000" />
-    <!-- HTTP Caching
-         Set HTTP caching related parameters (for proxy caches and clients).
-         The options below instruct Solr not to output any HTTP Caching
-         related headers
-      -->
+                    multipartUploadLimitInKB="2048000"
+                    formdataUploadLimitInKB="2048"/>
     <httpCaching never304="true" />
-    <!-- If you include a <cacheControl> directive, it will be used to
-         generate a Cache-Control header (as well as an Expires header
-         if the value contains "max-age=")
-         By default, no Cache-Control header is generated.
-         You can use the <cacheControl> option even if you have set
-         never304="true"
-      -->
-    <!--
-       <httpCaching never304="true" >
-         <cacheControl>max-age=30, public</cacheControl> 
-       </httpCaching>
-      -->
-    <!-- To enable Solr to respond with automatically generated HTTP
-         Caching headers, and to response to Cache Validation requests
-         correctly, set the value of never304="false"
-         This will cause Solr to generate Last-Modified and ETag
-         headers based on the properties of the Index.
-         The following options can also be specified to affect the
-         values of these headers...
-         lastModFrom - the default value is "openTime" which means the
-         Last-Modified value (and validation against If-Modified-Since
-         requests) will all be relative to when the current Searcher
-         was opened.  You can change it to lastModFrom="dirLastMod" if
-         you want the value to exactly correspond to when the physical
-         index was last modified.
-         etagSeed="..." is an option you can change to force the ETag
-         header (and validation against If-None-Match requests) to be
-         different even if the index has not changed (ie: when making
-         significant changes to your config file)
-         (lastModifiedFrom and etagSeed are both ignored if you use
-         the never304="true" option)
-      -->
-    <!--
-       <httpCaching lastModifiedFrom="openTime"
-                    etagSeed="Solr">
-         <cacheControl>max-age=30, public</cacheControl> 
-       </httpCaching>
-      -->
-  <requestHandler name="/mlt" class="solr.MoreLikeThisHandler" startup="lazy" />
   <!-- Request Handlers 

-       incoming queries will be dispatched to the correct handler
-       based on the path or the qt (query type) param.
-       Names starting with a '/' are accessed with the a path equal to
-       the registered name.  Names without a leading '/' are accessed
-       with: http://host/app/[core/]select?qt=name
-       If a /select request is processed with out a qt param
-       specified, the requestHandler that declares default="true" will
-       be used.
-       If a Request Handler is declared with startup="lazy", then it will
-       not be initialized until the first request that uses it.
   <!-- SearchHandler

-       For processing Search Queries, the primary Request Handler
-       provided with Solr is "SearchHandler" It delegates to a sequent
-       of SearchComponents (see below) and supports distributed
-       queries across multiple shards
-  <requestHandler name="search" class="solr.SearchHandler" default="true">
+  <requestHandler name="/select" class="solr.SearchHandler">
     <!-- default values for query parameters can be specified, these
          will be overridden by parameters in the request
      <lst name="defaults">
        <str name="echoParams">explicit</str>
        <int name="rows">10</int>
-     </lst>
-    <!-- In addition to defaults, "appends" params can be specified
-         to identify values which should be appended to the list of
-         multi-val params from the query (or the existing "defaults").
-      -->
-    <!-- In this example, the param "fq=instock:true" would be appended to
-         any query time fq params the user may specify, as a mechanism for
-         partitioning the index, independent of any user selected filtering
-         that may also be desired (perhaps as a result of faceted searching).
-         NOTE: there is *absolutely* nothing a client can do to prevent these
-         "appends" values from being used, so don't use this mechanism
-         unless you are sure you always want it.
-      -->
-    <!--
-       <lst name="appends">
-         <str name="fq">inStock:true</str>
-       </lst>
-      -->
-    <!-- "invariants" are a way of letting the Solr maintainer lock down
-         the options available to Solr clients.  Any params values
-         specified here are used regardless of what values may be specified
-         in either the query, the "defaults", or the "appends" params.
-         In this example, the facet.field and facet.query params would
-         be fixed, limiting the facets clients can use.  Faceting is
-         not turned on by default - but if the client does specify
-         facet=true in the request, these are the only facets they
-         will be able to see counts for; regardless of what other
-         facet.field or facet.query params they may specify.
-         NOTE: there is *absolutely* nothing a client can do to prevent these
-         "invariants" values from being used, so don't use this mechanism
-         unless you are sure you always want it.
-      -->
-    <!--
-       <lst name="invariants">
-         <str name="facet.field">cat</str>
-         <str name="facet.field">manu_exact</str>
-         <str name="facet.query">price:[* TO 500]</str>
-         <str name="facet.query">price:[500 TO *]</str>
-       </lst>
-      -->
-    <!-- If the default list of SearchComponents is not desired, that
-         list can either be overridden completely, or components can be
-         prepended or appended to the default list.  (see below)
-      -->
-    <!--
-       <arr name="components">
-         <str>nameOfCustomComponent1</str>
-         <str>nameOfCustomComponent2</str>
-       </arr>
-      -->
+    </lst>
-  <!-- XML Update Request Handler.  
+  <!-- Request Handler for similarity queries and topic classification -->
+  <requestHandler name="/mlt" class="solr.MoreLikeThisHandler" startup="lazy" />
-       The canonical Request Handler for Modifying the Index through
-       commands specified using XML.
+  <!-- A request handler that returns indented JSON by default -->
+  <requestHandler name="/query" class="solr.SearchHandler">
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+       <str name="wt">json</str>
+       <str name="indent">true</str>
+       <str name="df">text</str>
+     </lst>
+  </requestHandler>
-       Note: Since solr1.1 requestHandlers requires a valid content
-       type header if posted in the body. For example, curl now
-       requires: -H 'Content-type:text/xml; charset=utf-8'
-    -->
-  <requestHandler name="/update" 
-                  class="solr.XmlUpdateRequestHandler">
-    <!-- See below for information on defining 
-         updateRequestProcessorChains that can be used by name 
-         on each Update Request
-      -->
-    <!--
-       <lst name="defaults">
-         <str name="update.chain">dedupe</str>
-       </lst>
-       -->
-    </requestHandler>
-  <!-- Binary Update Request Handler
-    -->
-  <requestHandler name="/update/javabin" 
-                  class="solr.BinaryUpdateRequestHandler" />
-  <!-- CSV Update Request Handler
-    -->
-  <requestHandler name="/update/csv" 
-                  class="solr.CSVRequestHandler" 
-                  startup="lazy" />
+  <!-- realtime get handler, guaranteed to return the latest stored fields of
+       any document, without the need to commit or open a new searcher.  The
+       current implementation relies on the updateLog feature being enabled. -->
+  <requestHandler name="/get" class="solr.RealTimeGetHandler">
+     <lst name="defaults">
+       <str name="omitHeader">true</str>
+       <str name="wt">json</str>
+       <str name="indent">true</str>
+     </lst>
+  </requestHandler>
+  <!-- Update Request Handler.  
-  <!-- JSON Update Request Handler
-  <requestHandler name="/update/json" 
-                  class="solr.JsonUpdateRequestHandler" 
-                  startup="lazy" />
+  <requestHandler name="/update" class="solr.UpdateRequestHandler" />
+  <!-- for back compat with clients using /update/json and /update/csv -->  
+  <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
+        <lst name="defaults">
+         <str name="stream.contentType">application/json</str>
+       </lst>
+  </requestHandler>
+  <requestHandler name="/update/csv" class="solr.CSVRequestHandler">
+        <lst name="defaults">
+         <str name="stream.contentType">application/csv</str>
+       </lst>
+  </requestHandler>
   <!-- Solr Cell Update Request Handler
@@ -817,9 +246,6 @@
                   class="solr.extraction.ExtractingRequestHandler" >
     <lst name="defaults">
-      <!-- All the main content goes into "text"... if you need to return
-           the extracted text or do highlighting, use a stored field. -->
-      <str name="fmap.content">text</str>
       <str name="lowernames">true</str>
       <str name="uprefix">ignored_</str>
@@ -830,6 +256,7 @@
   <!-- Field Analysis Request Handler
        RequestHandler that provides much the same functionality as
@@ -858,7 +285,7 @@
        An analysis handler that provides a breakdown of the analysis
-       process of provided docuemnts. This handler expects a (single)
+       process of provided documents. This handler expects a (single)
        content stream with the following format:
@@ -897,11 +324,18 @@
   <!-- ping/healthcheck -->
   <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
-    <lst name="defaults">
-      <str name="qt">search</str>
+    <lst name="invariants">
       <str name="q">solrpingquery</str>
+    </lst>
+    <lst name="defaults">
       <str name="echoParams">all</str>
+    <!-- An optional feature of the PingRequestHandler is to configure the 
+         handler with a "healthcheckFile" which can be used to enable/disable 
+         the PingRequestHandler.
+         relative paths are resolved against the data dir 
+      -->
+    <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
   <!-- Echo the request contents back to the client -->
@@ -911,18 +345,347 @@
      <str name="echoHandler">true</str>
+  <!-- Solr Replication
+       The SolrReplicationHandler supports replicating indexes from a
+       "master" used for indexing and "slaves" used for queries.
-  <!-- Legacy config for the admin interface -->
-  <admin>
-    <defaultQuery>*:*</defaultQuery>
-    <!-- configure a healthcheck file for servers behind a
-         loadbalancer 
+       It is also neccessary for SolrCloud to function (in Cloud mode, the 
+       replication handler is used to bulk transfer segments when nodes 
+       are added or need to recover).
+    -->
+  <requestHandler name="/replication" class="solr.ReplicationHandler" > 
+    <!--
+       To enable simple master/slave replication, uncomment one of the 
+       sections below, depending on wether this solr instance should be 
+       the "master" or a "slave".  If this instance is a "slave" you will 
+       also need to fill in the masterUrl to point to a real machine.
+    -->
+    <!--
+       <lst name="master">
+         <str name="replicateAfter">commit</str>
+         <str name="replicateAfter">startup</str>
+         <str name="confFiles">schema.xml,stopwords.txt</str>
+       </lst>
+    -->
+    <!--
+       <lst name="slave">
+         <str name="masterUrl">http://your-master-hostname:8983/solr</str>
+         <str name="pollInterval">00:00:60</str>
+       </lst>
+    -->
+  </requestHandler>
+   <!-- Spell Check
+        The spell check component can return a list of alternative spelling
+        suggestions.  
+     -->
+  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
+    <str name="queryAnalyzerFieldType">textSpell</str>
+    <!-- Multiple "Spell Checkers" can be declared and used by this
+         component
+      -->
+    <!-- a spellchecker built from a field of the main index -->
+    <lst name="spellchecker">
+      <str name="name">default</str>
+      <str name="field">name</str>
+      <str name="classname">solr.DirectSolrSpellChecker</str>
+      <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
+      <str name="distanceMeasure">internal</str>
+      <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
+      <float name="accuracy">0.5</float>
+      <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
+      <int name="maxEdits">2</int>
+      <!-- the minimum shared prefix when enumerating terms -->
+      <int name="minPrefix">1</int>
+      <!-- maximum number of inspections per result. -->
+      <int name="maxInspections">5</int>
+      <!-- minimum length of a query term to be considered for correction -->
+      <int name="minQueryLength">4</int>
+      <!-- maximum threshold of documents a query term can appear to be considered for correction -->
+      <float name="maxQueryFrequency">0.01</float>
+      <!-- uncomment this to require suggestions to occur in 1% of the documents
+        <float name="thresholdTokenFrequency">.01</float>
+    </lst>
+    <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->
+    <lst name="spellchecker">
+      <str name="name">wordbreak</str>
+      <str name="classname">solr.WordBreakSolrSpellChecker</str>      
+      <str name="field">name</str>
+      <str name="combineWords">true</str>
+      <str name="breakWords">true</str>
+      <int name="maxChanges">10</int>
+    </lst>
+    <!-- a spellchecker that uses a different distance measure -->
-       <healthcheck type="file">server-enabled</healthcheck>
+       <lst name="spellchecker">
+         <str name="name">jarowinkler</str>
+         <str name="field">spell</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
+         <str name="distanceMeasure">
+         </str>
+       </lst>
+     -->
+    <!-- a spellchecker that use an alternate comparator 
+         comparatorClass be one of:
+          1. score (default)
+          2. freq (Frequency first, then score)
+          3. A fully qualified class name
+      -->
+    <!--
+       <lst name="spellchecker">
+         <str name="name">freq</str>
+         <str name="field">lowerfilt</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
+         <str name="comparatorClass">freq</str>
+      -->
+    <!-- A spellchecker that reads the list of words from a file -->
+    <!--
+       <lst name="spellchecker">
+         <str name="classname">solr.FileBasedSpellChecker</str>
+         <str name="name">file</str>
+         <str name="sourceLocation">spellings.txt</str>
+         <str name="characterEncoding">UTF-8</str>
+         <str name="spellcheckIndexDir">spellcheckerFile</str>
+       </lst>
+  </searchComponent>
+  <!-- A request handler for demonstrating the spellcheck component.  
+       NOTE: This is purely as an example.  The whole purpose of the
+       SpellCheckComponent is to hook it into the request handler that
+       handles your normal user queries so that a separate request is
+       not needed to get suggestions.
+       See for details
+       on the request parameters.
+    -->
+  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="df">text</str>
+      <!-- Solr will use suggestions from both the 'default' spellchecker
+           and from the 'wordbreak' spellchecker and combine them.
+           collations (re-written queries) can include a combination of
+           corrections from both spellcheckers -->
+      <str name="spellcheck.dictionary">default</str>
+      <str name="spellcheck.dictionary">wordbreak</str>
+      <str name="spellcheck">on</str>
+      <str name="spellcheck.extendedResults">true</str>       
+      <str name="spellcheck.count">10</str>
+      <str name="spellcheck.alternativeTermCount">5</str>
+      <str name="spellcheck.maxResultsForSuggest">5</str>       
+      <str name="spellcheck.collate">true</str>
+      <str name="spellcheck.collateExtendedResults">true</str>  
+      <str name="spellcheck.maxCollationTries">10</str>
+      <str name="spellcheck.maxCollations">5</str>         
+    </lst>
+    <arr name="last-components">
+      <str>spellcheck</str>
+    </arr>
+  </requestHandler>
+  <!-- Term Vector Component
+    -->
+  <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
+  <!-- Clustering Component
+       You'll need to set the solr.clustering.enabled system property
+       when running solr to run with clustering enabled:
+            java -Dsolr.clustering.enabled=true -jar start.jar
+    -->
+  <searchComponent name="clustering"
+                   enable="${solr.clustering.enabled:false}"
+                   class="solr.clustering.ClusteringComponent" >
+    <!-- Declare an engine -->
+    <lst name="engine">
+      <!-- The name, only one can be named "default" -->
+      <str name="name">default</str>
+      <!-- Class name of Carrot2 clustering algorithm.
+           Currently available algorithms are:
+           * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+           *
+           * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
+           See for the
+           algorithm's characteristics.
+        -->
+      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+      <!-- Overriding values for Carrot2 default algorithm attributes.
+           For a description of all available attributes, see:
+           Use attribute key as name attribute of str elements
+           below. These can be further overridden for individual
+           requests by specifying attribute key as request parameter
+           name and attribute value as parameter value.
+        -->
+      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
+      <!-- Location of Carrot2 lexical resources.
+           A directory from which to load Carrot2-specific stop words
+           and stop labels. Absolute or relative to Solr config directory.
+           If a specific resource (e.g. stopwords.en) is present in the
+           specified dir, it will completely override the corresponding
+           default one that ships with Carrot2.
+           For an overview of Carrot2 lexical resources, see:
+        -->
+      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
+      <!-- The language to assume for the documents.
+           For a list of allowed values, see:
+       -->
+      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
+    </lst>
+    <lst name="engine">
+      <str name="name">stc</str>
+      <str name="carrot.algorithm"></str>
+    </lst>
+  </searchComponent>
+  <!-- A request handler for demonstrating the clustering component
+       This is purely as an example.
+       In reality you will likely want to add the component to your 
+       already specified request handlers. 
+    -->
+  <requestHandler name="/clustering"
+                  startup="lazy"
+                  enable="${solr.clustering.enabled:false}"
+                  class="solr.SearchHandler">
+    <lst name="defaults">
+      <bool name="clustering">true</bool>
+      <str name="clustering.engine">default</str>
+      <bool name="clustering.results">true</bool>
+      <!-- The title field -->
+      <str name="carrot.title">name</str>
+      <str name="carrot.url">id</str>
+      <!-- The field to cluster on -->
+       <str name="carrot.snippet">features</str>
+       <!-- produce summaries -->
+       <bool name="carrot.produceSummary">true</bool>
+       <!-- the maximum number of labels per cluster -->
+       <!--<int name="carrot.numDescriptions">5</int>-->
+       <!-- produce sub clusters -->
+       <bool name="carrot.outputSubClusters">false</bool>
+       <str name="defType">edismax</str>
+       <str name="qf">
+         text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+       </str>
+       <str name="q.alt">*:*</str>
+       <str name="rows">10</str>
+       <str name="fl">*,score</str>
+    </lst>     
+    <arr name="last-components">
+      <str>clustering</str>
+    </arr>
+  </requestHandler>
+  <!-- Terms Component
+       A component to return terms and document frequency of those
+       terms
+    -->
+  <searchComponent name="terms" class="solr.TermsComponent"/>
+  <!-- A request handler for demonstrating the terms component -->
+  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
+     <lst name="defaults">
+      <bool name="terms">true</bool>
+      <bool name="distrib">false</bool>
+    </lst>     
+    <arr name="components">
+      <str>terms</str>
+    </arr>
+  </requestHandler>
+  <!-- Update Processors
+       Chains of Update Processor Factories for dealing with Update
+       Requests can be declared, and then used by name in Update
+       Request Processors
+    --> 
+  <queryResponseWriter name="json" class="solr.JSONResponseWriter">
+     <!-- For the purposes of the tutorial, JSON responses are written as
+      plain text so that they are easy to read in *any* browser.
+      If you expect a MIME type of "application/json" just remove this override.
+     -->
+    <str name="content-type">text/plain; charset=UTF-8</str>
+  </queryResponseWriter>
+  <!--
+     Custom response writers can be declared as needed...
+    -->
+    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
+  <!-- XSLT response writer transforms the XML output by any xslt file found
+       in Solr's conf/xslt directory.  Changes to xslt files are checked for
+       every xsltCacheLifetimeSeconds.  
+    -->
+  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
+    <int name="xsltCacheLifetimeSeconds">5</int>
+  </queryResponseWriter>
+  <!-- Query Parsers
+       Multiple QParserPlugins can be registered by name, and then
+       used in either the "defType" param for the QueryComponent (used
+       by SearchHandler) or in LocalParams
+    -->
+  <!-- Legacy config for the admin interface -->
+  <admin>
+    <defaultQuery>*:*</defaultQuery>

Added: stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/conf/schema.xml
--- stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/conf/schema.xml (added)
+++ stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/conf/schema.xml Wed Jun  5 07:23:15 2013
@@ -0,0 +1,107 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+  license agreements. See the NOTICE file distributed with this work for additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  You under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
+<schema name="shingle-topic-model" version="1.3">
+  <types>
+    <fieldType name="uuid" class="solr.UUIDField" indexed="true" />
+    <fieldType name="string" class="solr.StrField"
+      sortMissingLast="true" omitNorms="true" />
+    <fieldType name="tint" class="solr.TrieIntField"
+      precisionStep="0" omitNorms="true" positionIncrementGap="0" />
+    <fieldType name="tfloat" class="solr.TrieFloatField"
+      precisionStep="0" omitNorms="true" positionIncrementGap="0" />
+    <fieldType name="tdate" class="solr.TrieDateField"
+      omitNorms="true" precisionStep="6" positionIncrementGap="0" />
+    <fieldType name="random" class="solr.RandomSortField"
+      indexed="true" />
+    <fieldType name="text" class="solr.TextField">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory" />
+        <!-- <filter class="solr.StopFilterFactory" ignoreCase="true" -->
+        <!-- words="stopwords_en.txt" enablePositionIncrements="false" /> -->
+        <filter class="solr.LowerCaseFilterFactory" />
+        <!-- Shingle help improve the quality but they increase 
+          the size of the index -->
+          <filter class="solr.ShingleFilterFactory" maxShingleSize="2" outputUnigrams="true"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory" />
+        <!-- <filter class="solr.StopFilterFactory" ignoreCase="true" -->
+        <!-- words="stopwords_en.txt" enablePositionIncrements="false" /> -->
+        <!-- <filter class="solr.SynonymFilterFactory" -->
+        <!-- synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -->
+        <filter class="solr.LowerCaseFilterFactory" />
+        <filter class="solr.ShingleFilterFactory" maxShingleSize="2" 
+          outputUnigrams="false" outputUnigramIfNoNgram="true"/>
+      </analyzer>
+    </fieldType>
+  </types>
+  <fields>
+    <!-- Physical (automated) primary key. Each topic is stored into 2 Solr 
+      entries to be able to handle the partial update of stored attributes such 
+      as estimation of the predictive accuracy and broader topic links while preserving 
+      the previous version of the statistical model -->
+    <field name="entry_id" type="string" indexed="true" stored="true"
+      required="true" />
+    <!-- Mandatory field for all entries: this is the logical primary key -->
+    <field name="concept" type="string" indexed="true" stored="true"
+      required="true" />
+    <!-- If entry_type can be model 'model' or 'metadata' -->
+    <field name="entry_type" type="string" indexed="true" stored="true"
+      required="true" />
+    <!-- Mandatory classifier model attribute when entry_type == 'model' -->
+    <field name="classifier_features" type="text" indexed="true"
+      stored="false" termVectors="true" termPositions="false"
+      termOffsets="false" />
+    <!-- Classifier model stored attributes when entry_type == 'metadata' -->
+    <field name="model_entry_id" type="string" indexed="true"
+      stored="true" />
+    <field name="primary_topic" type="string" indexed="true" stored="true" />
+    <field name="broader" type="string" indexed="true" stored="true"
+      multiValued="true" />
+    <field name="last_update_dt" type="tdate" indexed="true"
+      stored="true" />
+    <!-- Accuracy evaluation of the model (accross CV folds) -->
+    <field name="precision" type="tfloat" indexed="true" stored="true"
+      multiValued="true" />
+    <field name="recall" type="tfloat" indexed="true" stored="true"
+      multiValued="true" />
+    <field name="last_evaluation_dt" type="tdate" indexed="true"
+      stored="true" />
+    <field name="positive_support" type="tint" indexed="false"
+      stored="true" multiValued="true" />
+    <field name="negative_support" type="tint" indexed="false"
+      stored="true" multiValued="true" />
+    <!-- Store ids of some false positive and negative examples (accumulated 
+      over several CV folds) -->
+    <field name="false_positives" type="string" indexed="false"
+      multiValued="true" stored="true" />
+    <field name="false_negatives" type="string" indexed="false"
+      multiValued="true" stored="true" />
+  </fields>
+  <uniqueKey>entry_id</uniqueKey>
+  <defaultSearchField>classifier_features</defaultSearchField>
+  <solrQueryParser defaultOperator="AND" />

Added: stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/conf/solrconfig.xml
--- stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/conf/solrconfig.xml (added)
+++ stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/conf/solrconfig.xml Wed Jun  5 07:23:15 2013
@@ -0,0 +1,691 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ See the License for the specific language governing permissions and
+ limitations under the License.
+     For more details about configurations options that may appear in
+     this file, see 
+  <!-- In all configuration below, a prefix of "solr." for class names
+       is an alias that causes solr to search appropriate packages,
+       including org.apache.solr.(search|update|request|core|analysis)
+       You may also specify a fully qualified Java classname if you
+       have your own custom plugins.
+    -->
+  <!-- Set this to 'false' if you want solr to continue working after
+       it has encountered an severe configuration error.  In a
+       production environment, you may want solr to keep working even
+       if one handler is mis-configured.
+       You may also set this to false using by setting the system
+       property:
+         -Dsolr.abortOnConfigurationError=false
+    -->
+  <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
+  <!-- Controls what version of Lucene various components of Solr
+       adhere to.  Generally, you want to use the latest version to
+       get all bug fixes and improvements. It is highly recommended
+       that you fully re-index after changing this setting as it can
+       affect both how text is indexed and queried.
+    -->
+  <luceneMatchVersion>LUCENE_42</luceneMatchVersion>
+  <dataDir>${}</dataDir>
+  <directoryFactory name="DirectoryFactory" 
+                    class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> 
+  <indexConfig>
+    <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a 
+         LimitTokenCountFilterFactory in your fieldType definition. E.g. 
+     <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
+    -->
+    <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
+    <writeLockTimeout>5000</writeLockTimeout>
+ </indexConfig>
+ <jmx />
+   <!-- The default high-performance update handler -->
+  <updateHandler class="solr.DirectUpdateHandler2">
+    <!-- Deactivate transaction log 
+    <updateLog>
+      <str name="dir">${solr.ulog.dir:}</str>
+    </updateLog > -->
+    <!-- no auto commit
+     <autoCommit> 
+       <maxTime>15000</maxTime> 
+       <openSearcher>false</openSearcher> 
+     </autoCommit>
+     -->
+     <!--
+       <autoSoftCommit> 
+         <maxTime>1000</maxTime> 
+       </autoSoftCommit>
+      -->
+  </updateHandler>
+  <query>
+    <maxBooleanClauses>1024</maxBooleanClauses>
+    <filterCache class="solr.FastLRUCache"
+                 size="2048"
+                 initialSize="1024"
+                 autowarmCount="512"/>
+    <queryResultCache class="solr.LRUCache"
+                     size="2048"
+                     initialSize="1024"
+                     autowarmCount="512"/>
+    <documentCache class="solr.LRUCache"
+                   size="4096"
+                   initialSize="1024"
+                   autowarmCount="0"/>
+    <!--
+       <fieldValueCache class="solr.FastLRUCache"
+                        size="512"
+                        autowarmCount="128"
+                        showItems="32" />
+      -->
+    <enableLazyFieldLoading>true</enableLazyFieldLoading>
+   <!-- Result Window Size
+        An optimization for use with the queryResultCache.  When a search
+        is requested, a superset of the requested number of document ids
+        are collected.  For example, if a search for a particular query
+        requests matching documents 10 through 19, and queryWindowSize is 50,
+        then documents 0 through 49 will be collected and cached.  Any further
+        requests in that range can be satisfied via the cache.  
+     -->
+   <queryResultWindowSize>20</queryResultWindowSize>
+   <!-- Maximum number of documents to cache for any entry in the
+        queryResultCache. 
+     -->
+   <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
+    <!-- QuerySenderListener takes an array of NamedList and executes a
+         local query request for each NamedList in sequence. 
+      -->
+    <listener event="newSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <!--
+           <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
+           <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
+          -->
+      </arr>
+    </listener>
+    <listener event="firstSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <lst>
+          <str name="q">static firstSearcher warming in solrconfig.xml</str>
+        </lst>
+      </arr>
+    </listener>
+    <!-- Use Cold Searcher
+         If a search request comes in and there is no current
+         registered searcher, then immediately register the still
+         warming searcher and use it.  If "false" then all requests
+         will block until the first searcher is done warming.
+      -->
+    <useColdSearcher>false</useColdSearcher>
+    <!-- Max Warming Searchers
+         Maximum number of searchers that may be warming in the
+         background concurrently.  An error is returned if this limit
+         is exceeded.
+         Recommend values of 1-2 for read-only slaves, higher for
+         masters w/o cache warming.
+      -->
+    <maxWarmingSearchers>2</maxWarmingSearchers>
+  </query>
+  <requestDispatcher handleSelect="false" >
+    <requestParsers enableRemoteStreaming="true" 
+                    multipartUploadLimitInKB="2048000"
+                    formdataUploadLimitInKB="2048"/>
+    <httpCaching never304="true" />
+  </requestDispatcher>
+  <!-- Request Handlers 
+    -->
+  <!-- SearchHandler
+    -->
+  <requestHandler name="/select" class="solr.SearchHandler">
+    <!-- default values for query parameters can be specified, these
+         will be overridden by parameters in the request
+      -->
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+       <int name="rows">10</int>
+    </lst>
+    </requestHandler>
+  <!-- Request Handler for similarity queries and topic classification -->
+  <requestHandler name="/mlt" class="solr.MoreLikeThisHandler" startup="lazy" />
+  <!-- A request handler that returns indented JSON by default -->
+  <requestHandler name="/query" class="solr.SearchHandler">
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+       <str name="wt">json</str>
+       <str name="indent">true</str>
+       <str name="df">text</str>
+     </lst>
+  </requestHandler>
+  <!-- realtime get handler, guaranteed to return the latest stored fields of
+       any document, without the need to commit or open a new searcher.  The
+       current implementation relies on the updateLog feature being enabled. -->
+  <requestHandler name="/get" class="solr.RealTimeGetHandler">
+     <lst name="defaults">
+       <str name="omitHeader">true</str>
+       <str name="wt">json</str>
+       <str name="indent">true</str>
+     </lst>
+  </requestHandler>
+  <!-- Update Request Handler.  
+    -->
+  <requestHandler name="/update" class="solr.UpdateRequestHandler" />
+  <!-- for back compat with clients using /update/json and /update/csv -->  
+  <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
+        <lst name="defaults">
+         <str name="stream.contentType">application/json</str>
+       </lst>
+  </requestHandler>
+  <requestHandler name="/update/csv" class="solr.CSVRequestHandler">
+        <lst name="defaults">
+         <str name="stream.contentType">application/csv</str>
+       </lst>
+  </requestHandler>
+  <!-- Solr Cell Update Request Handler
+    -->
+  <requestHandler name="/update/extract" 
+                  startup="lazy"
+                  class="solr.extraction.ExtractingRequestHandler" >
+    <lst name="defaults">
+      <str name="lowernames">true</str>
+      <str name="uprefix">ignored_</str>
+      <!-- capture link hrefs but ignore div attributes -->
+      <str name="captureAttr">true</str>
+      <str name="fmap.a">links</str>
+      <str name="fmap.div">ignored_</str>
+    </lst>
+  </requestHandler>
+  <!-- Field Analysis Request Handler
+       RequestHandler that provides much the same functionality as
+       analysis.jsp. Provides the ability to specify multiple field
+       types and field names in the same request and outputs
+       index-time and query-time analysis for each of them.
+       Request parameters are:
+       analysis.fieldname - field name whose analyzers are to be used
+       analysis.fieldtype - field type whose analyzers are to be used
+       analysis.fieldvalue - text for index-time analysis
+       q (or analysis.q) - text for query time analysis
+       analysis.showmatch (true|false) - When set to true and when
+           query analysis is performed, the produced tokens of the
+           field value analysis will be marked as "matched" for every
+           token that is produces by the query analysis
+   -->
+  <requestHandler name="/analysis/field" 
+                  startup="lazy"
+                  class="solr.FieldAnalysisRequestHandler" />
+  <!-- Document Analysis Handler
+       An analysis handler that provides a breakdown of the analysis
+       process of provided documents. This handler expects a (single)
+       content stream with the following format:
+       <docs>
+         <doc>
+           <field name="id">1</field>
+           <field name="name">The Name</field>
+           <field name="text">The Text Value</field>
+         </doc>
+         <doc>...</doc>
+         <doc>...</doc>
+         ...
+       </docs>
+    Note: Each document must contain a field which serves as the
+    unique key. This key is used in the returned response to associate
+    an analysis breakdown to the analyzed document.
+    Like the FieldAnalysisRequestHandler, this handler also supports
+    query analysis by sending either an "analysis.query" or "q"
+    request parameter that holds the query text to be analyzed. It
+    also supports the "analysis.showmatch" parameter which when set to
+    true, all field tokens that match the query tokens will be marked
+    as a "match". 
+  -->
+  <requestHandler name="/analysis/document" 
+                  class="solr.DocumentAnalysisRequestHandler" 
+                  startup="lazy" />
+  <!-- Admin Handlers
+       Admin Handlers - This will register all the standard admin
+       RequestHandlers.  
+    -->
+  <requestHandler name="/admin/" 
+                  class="solr.admin.AdminHandlers" />
+  <!-- ping/healthcheck -->
+  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
+    <lst name="invariants">
+      <str name="q">solrpingquery</str>
+    </lst>
+    <lst name="defaults">
+      <str name="echoParams">all</str>
+    </lst>
+    <!-- An optional feature of the PingRequestHandler is to configure the 
+         handler with a "healthcheckFile" which can be used to enable/disable 
+         the PingRequestHandler.
+         relative paths are resolved against the data dir 
+      -->
+    <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
+  </requestHandler>
+  <!-- Echo the request contents back to the client -->
+  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
+    <lst name="defaults">
+     <str name="echoParams">explicit</str> 
+     <str name="echoHandler">true</str>
+    </lst>
+  </requestHandler>
+  <!-- Solr Replication
+       The SolrReplicationHandler supports replicating indexes from a
+       "master" used for indexing and "slaves" used for queries.
+       It is also neccessary for SolrCloud to function (in Cloud mode, the 
+       replication handler is used to bulk transfer segments when nodes 
+       are added or need to recover).
+    -->
+  <requestHandler name="/replication" class="solr.ReplicationHandler" > 
+    <!--
+       To enable simple master/slave replication, uncomment one of the 
+       sections below, depending on wether this solr instance should be 
+       the "master" or a "slave".  If this instance is a "slave" you will 
+       also need to fill in the masterUrl to point to a real machine.
+    -->
+    <!--
+       <lst name="master">
+         <str name="replicateAfter">commit</str>
+         <str name="replicateAfter">startup</str>
+         <str name="confFiles">schema.xml,stopwords.txt</str>
+       </lst>
+    -->
+    <!--
+       <lst name="slave">
+         <str name="masterUrl">http://your-master-hostname:8983/solr</str>
+         <str name="pollInterval">00:00:60</str>
+       </lst>
+    -->
+  </requestHandler>
+   <!-- Spell Check
+        The spell check component can return a list of alternative spelling
+        suggestions.  
+     -->
+  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
+    <str name="queryAnalyzerFieldType">textSpell</str>
+    <!-- Multiple "Spell Checkers" can be declared and used by this
+         component
+      -->
+    <!-- a spellchecker built from a field of the main index -->
+    <lst name="spellchecker">
+      <str name="name">default</str>
+      <str name="field">name</str>
+      <str name="classname">solr.DirectSolrSpellChecker</str>
+      <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
+      <str name="distanceMeasure">internal</str>
+      <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
+      <float name="accuracy">0.5</float>
+      <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
+      <int name="maxEdits">2</int>
+      <!-- the minimum shared prefix when enumerating terms -->
+      <int name="minPrefix">1</int>
+      <!-- maximum number of inspections per result. -->
+      <int name="maxInspections">5</int>
+      <!-- minimum length of a query term to be considered for correction -->
+      <int name="minQueryLength">4</int>
+      <!-- maximum threshold of documents a query term can appear to be considered for correction -->
+      <float name="maxQueryFrequency">0.01</float>
+      <!-- uncomment this to require suggestions to occur in 1% of the documents
+        <float name="thresholdTokenFrequency">.01</float>
+      -->
+    </lst>
+    <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->
+    <lst name="spellchecker">
+      <str name="name">wordbreak</str>
+      <str name="classname">solr.WordBreakSolrSpellChecker</str>      
+      <str name="field">name</str>
+      <str name="combineWords">true</str>
+      <str name="breakWords">true</str>
+      <int name="maxChanges">10</int>
+    </lst>
+    <!-- a spellchecker that uses a different distance measure -->
+    <!--
+       <lst name="spellchecker">
+         <str name="name">jarowinkler</str>
+         <str name="field">spell</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
+         <str name="distanceMeasure">
+         </str>
+       </lst>
+     -->
+    <!-- a spellchecker that use an alternate comparator 
+         comparatorClass be one of:
+          1. score (default)
+          2. freq (Frequency first, then score)
+          3. A fully qualified class name
+      -->
+    <!--
+       <lst name="spellchecker">
+         <str name="name">freq</str>
+         <str name="field">lowerfilt</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
+         <str name="comparatorClass">freq</str>
+      -->
+    <!-- A spellchecker that reads the list of words from a file -->
+    <!--
+       <lst name="spellchecker">
+         <str name="classname">solr.FileBasedSpellChecker</str>
+         <str name="name">file</str>
+         <str name="sourceLocation">spellings.txt</str>
+         <str name="characterEncoding">UTF-8</str>
+         <str name="spellcheckIndexDir">spellcheckerFile</str>
+       </lst>
+      -->
+  </searchComponent>
+  <!-- A request handler for demonstrating the spellcheck component.  
+       NOTE: This is purely as an example.  The whole purpose of the
+       SpellCheckComponent is to hook it into the request handler that
+       handles your normal user queries so that a separate request is
+       not needed to get suggestions.
+       See for details
+       on the request parameters.
+    -->
+  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="df">text</str>
+      <!-- Solr will use suggestions from both the 'default' spellchecker
+           and from the 'wordbreak' spellchecker and combine them.
+           collations (re-written queries) can include a combination of
+           corrections from both spellcheckers -->
+      <str name="spellcheck.dictionary">default</str>
+      <str name="spellcheck.dictionary">wordbreak</str>
+      <str name="spellcheck">on</str>
+      <str name="spellcheck.extendedResults">true</str>       
+      <str name="spellcheck.count">10</str>
+      <str name="spellcheck.alternativeTermCount">5</str>
+      <str name="spellcheck.maxResultsForSuggest">5</str>       
+      <str name="spellcheck.collate">true</str>
+      <str name="spellcheck.collateExtendedResults">true</str>  
+      <str name="spellcheck.maxCollationTries">10</str>
+      <str name="spellcheck.maxCollations">5</str>         
+    </lst>
+    <arr name="last-components">
+      <str>spellcheck</str>
+    </arr>
+  </requestHandler>
+  <!-- Term Vector Component
+    -->
+  <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
+  <!-- Clustering Component
+       You'll need to set the solr.clustering.enabled system property
+       when running solr to run with clustering enabled:
+            java -Dsolr.clustering.enabled=true -jar start.jar
+    -->
+  <searchComponent name="clustering"
+                   enable="${solr.clustering.enabled:false}"
+                   class="solr.clustering.ClusteringComponent" >
+    <!-- Declare an engine -->
+    <lst name="engine">
+      <!-- The name, only one can be named "default" -->
+      <str name="name">default</str>
+      <!-- Class name of Carrot2 clustering algorithm.
+           Currently available algorithms are:
+           * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+           *
+           * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
+           See for the
+           algorithm's characteristics.
+        -->
+      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+      <!-- Overriding values for Carrot2 default algorithm attributes.
+           For a description of all available attributes, see:
+           Use attribute key as name attribute of str elements
+           below. These can be further overridden for individual
+           requests by specifying attribute key as request parameter
+           name and attribute value as parameter value.
+        -->
+      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
+      <!-- Location of Carrot2 lexical resources.
+           A directory from which to load Carrot2-specific stop words
+           and stop labels. Absolute or relative to Solr config directory.
+           If a specific resource (e.g. stopwords.en) is present in the
+           specified dir, it will completely override the corresponding
+           default one that ships with Carrot2.
+           For an overview of Carrot2 lexical resources, see:
+        -->
+      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
+      <!-- The language to assume for the documents.
+           For a list of allowed values, see:
+       -->
+      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
+    </lst>
+    <lst name="engine">
+      <str name="name">stc</str>
+      <str name="carrot.algorithm"></str>
+    </lst>
+  </searchComponent>
+  <!-- A request handler for demonstrating the clustering component
+       This is purely as an example.
+       In reality you will likely want to add the component to your 
+       already specified request handlers. 
+    -->
+  <requestHandler name="/clustering"
+                  startup="lazy"
+                  enable="${solr.clustering.enabled:false}"
+                  class="solr.SearchHandler">
+    <lst name="defaults">
+      <bool name="clustering">true</bool>
+      <str name="clustering.engine">default</str>
+      <bool name="clustering.results">true</bool>
+      <!-- The title field -->
+      <str name="carrot.title">name</str>
+      <str name="carrot.url">id</str>
+      <!-- The field to cluster on -->
+       <str name="carrot.snippet">features</str>
+       <!-- produce summaries -->
+       <bool name="carrot.produceSummary">true</bool>
+       <!-- the maximum number of labels per cluster -->
+       <!--<int name="carrot.numDescriptions">5</int>-->
+       <!-- produce sub clusters -->
+       <bool name="carrot.outputSubClusters">false</bool>
+       <str name="defType">edismax</str>
+       <str name="qf">
+         text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+       </str>
+       <str name="q.alt">*:*</str>
+       <str name="rows">10</str>
+       <str name="fl">*,score</str>
+    </lst>     
+    <arr name="last-components">
+      <str>clustering</str>
+    </arr>
+  </requestHandler>
+  <!-- Terms Component
+       A component to return terms and document frequency of those
+       terms
+    -->
+  <searchComponent name="terms" class="solr.TermsComponent"/>
+  <!-- A request handler for demonstrating the terms component -->
+  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
+     <lst name="defaults">
+      <bool name="terms">true</bool>
+      <bool name="distrib">false</bool>
+    </lst>     
+    <arr name="components">
+      <str>terms</str>
+    </arr>
+  </requestHandler>
+  <!-- Update Processors
+       Chains of Update Processor Factories for dealing with Update
+       Requests can be declared, and then used by name in Update
+       Request Processors
+    --> 
+  <queryResponseWriter name="json" class="solr.JSONResponseWriter">
+     <!-- For the purposes of the tutorial, JSON responses are written as
+      plain text so that they are easy to read in *any* browser.
+      If you expect a MIME type of "application/json" just remove this override.
+     -->
+    <str name="content-type">text/plain; charset=UTF-8</str>
+  </queryResponseWriter>
+  <!--
+     Custom response writers can be declared as needed...
+    -->
+    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
+  <!-- XSLT response writer transforms the XML output by any xslt file found
+       in Solr's conf/xslt directory.  Changes to xslt files are checked for
+       every xsltCacheLifetimeSeconds.  
+    -->
+  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
+    <int name="xsltCacheLifetimeSeconds">5</int>
+  </queryResponseWriter>
+  <!-- Query Parsers
+       Multiple QParserPlugins can be registered by name, and then
+       used in either the "defType" param for the QueryComponent (used
+       by SearchHandler) or in LocalParams
+    -->
+  <!-- Legacy config for the admin interface -->
+  <admin>
+    <defaultQuery>*:*</defaultQuery>
+  </admin>

Modified: stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/
--- stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/ (original)
+++ stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/ Wed Jun  5 07:23:15 2013
@@ -175,13 +175,14 @@ public class TopicEngineTest extends Emb
         assertEquals(classifier.acceptedLanguages, new ArrayList<String>());
         // check some required attributes
-        Hashtable<String,Object> configWithMissingTopicField = new Hashtable<String,Object>();
-        configWithMissingTopicField.putAll(config);
-        configWithMissingTopicField.remove(TopicClassificationEngine.CONCEPT_URI_FIELD);
-        try {
-            TopicClassificationEngine.fromParameters(configWithMissingTopicField);
-            fail("Should have raised a ConfigurationException");
-        } catch (ConfigurationException e) {}
+// NOTE: This is no longer an required field, but uses a default values instead
+//        Hashtable<String,Object> configWithMissingTopicField = new Hashtable<String,Object>();
+//        configWithMissingTopicField.putAll(config);
+//        configWithMissingTopicField.remove(TopicClassificationEngine.CONCEPT_URI_FIELD);
+//        try {
+//            TopicClassificationEngine.fromParameters(configWithMissingTopicField);
+//            fail("Should have raised a ConfigurationException");
+//        } catch (ConfigurationException e) {}
         Hashtable<String,Object> configWithMissingEngineName = new Hashtable<String,Object>();
@@ -594,22 +595,23 @@ public class TopicEngineTest extends Emb
     protected Hashtable<String,Object> getDefaultClassifierConfigParams() {
         Hashtable<String,Object> config = new Hashtable<String,Object>();
         config.put(EnhancementEngine.PROPERTY_NAME, "test-engine");
-        config.put(TopicClassificationEngine.ENTRY_ID_FIELD, "entry_id");
-        config.put(TopicClassificationEngine.ENTRY_TYPE_FIELD, "entry_type");
-        config.put(TopicClassificationEngine.MODEL_ENTRY_ID_FIELD, "model_entry_id");
         config.put(TopicClassificationEngine.SOLR_CORE, classifierSolrServer);
-        config.put(TopicClassificationEngine.CONCEPT_URI_FIELD, "concept");
-        config.put(TopicClassificationEngine.PRIMARY_TOPIC_URI_FIELD, "primary_topic");
-        config.put(TopicClassificationEngine.SIMILARTITY_FIELD, "classifier_features");
-        config.put(TopicClassificationEngine.BROADER_FIELD, "broader");
-        config.put(TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD, "last_update_dt");
-        config.put(TopicClassificationEngine.MODEL_EVALUATION_DATE_FIELD, "last_evaluation_dt");
-        config.put(TopicClassificationEngine.PRECISION_FIELD, "precision");
-        config.put(TopicClassificationEngine.RECALL_FIELD, "recall");
-        config.put(TopicClassificationEngine.POSITIVE_SUPPORT_FIELD, "positive_support");
-        config.put(TopicClassificationEngine.NEGATIVE_SUPPORT_FIELD, "negative_support");
-        config.put(TopicClassificationEngine.FALSE_POSITIVES_FIELD, "false_positives");
-        config.put(TopicClassificationEngine.FALSE_NEGATIVES_FIELD, "false_negatives");
+        //those are now optional properties
+//        config.put(TopicClassificationEngine.ENTRY_ID_FIELD, "entry_id");
+//        config.put(TopicClassificationEngine.ENTRY_TYPE_FIELD, "entry_type");
+//        config.put(TopicClassificationEngine.MODEL_ENTRY_ID_FIELD, "model_entry_id");
+//        config.put(TopicClassificationEngine.CONCEPT_URI_FIELD, "concept");
+//        config.put(TopicClassificationEngine.PRIMARY_TOPIC_URI_FIELD, "primary_topic");
+//        config.put(TopicClassificationEngine.SIMILARTITY_FIELD, "classifier_features");
+//        config.put(TopicClassificationEngine.BROADER_FIELD, "broader");
+//        config.put(TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD, "last_update_dt");
+//        config.put(TopicClassificationEngine.MODEL_EVALUATION_DATE_FIELD, "last_evaluation_dt");
+//        config.put(TopicClassificationEngine.PRECISION_FIELD, "precision");
+//        config.put(TopicClassificationEngine.RECALL_FIELD, "recall");
+//        config.put(TopicClassificationEngine.POSITIVE_SUPPORT_FIELD, "positive_support");
+//        config.put(TopicClassificationEngine.NEGATIVE_SUPPORT_FIELD, "negative_support");
+//        config.put(TopicClassificationEngine.FALSE_POSITIVES_FIELD, "false_positives");
+//        config.put(TopicClassificationEngine.FALSE_NEGATIVES_FIELD, "false_negatives");
         return config;