You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2011/02/17 13:00:20 UTC

svn commit: r1071590 [1/3] - in /incubator/stanbol/trunk/entityhub: ./ generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/ indexing/dblp/ indexing/dblp/solrConf/ indexing/dblp/solrConf/dblp/ indexing/dblp/solrConf/dblp/...

Author: rwesten
Date: Thu Feb 17 12:00:19 2011
New Revision: 1071590

URL: http://svn.apache.org/viewvc?rev=1071590&view=rev
Log:
Indexer for dblp (see README.txt for details

other stuff:
 - added the swrc namespace prefix to the namespace enum.
 - corrected some bugs in the RdfIndexer

Open Issues:

The Code for this Indexer is mainly the same as for the dbPedia one. The main differences are the defaults.
The open Issue is to implement an generic version of Indexers for DataSets that are available as RDF dumps.
This Indexer and the dbPedia Indexer can than be replaced by configurations for the more generic one.

Added:
    incubator/stanbol/trunk/entityhub/indexing/dblp/
    incubator/stanbol/trunk/entityhub/indexing/dblp/README.txt   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/admin-extra.html   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/elevate.xml   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/mapping-ISOLatin1Accent.txt   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/protwords.txt   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/schema.xml   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/scripts.conf
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/solrconfig.xml   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/spellings.txt   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/stopwords.txt   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/synonyms.txt   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/xslt/
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/xslt/example.xsl   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/xslt/example_atom.xsl   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/xslt/example_rss.xsl   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/xslt/luke.xsl   (with props)
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/main/
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/main/java/
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/main/java/org/
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/main/java/org/apache/
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/main/java/org/apache/stanbol/
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/main/java/org/apache/stanbol/entityhub/
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/main/java/org/apache/stanbol/entityhub/indexing/
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/main/java/org/apache/stanbol/entityhub/indexing/dblp/
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/main/java/org/apache/stanbol/entityhub/indexing/dblp/cli/
    incubator/stanbol/trunk/entityhub/indexing/dblp/src/main/java/org/apache/stanbol/entityhub/indexing/dblp/cli/CommandLineRunner.java   (with props)
Modified:
    incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
    incubator/stanbol/trunk/entityhub/indexing/genericRdf/src/main/java/org/apache/stanbol/entityhub/indexing/rdf/RdfIndexer.java
    incubator/stanbol/trunk/entityhub/pom.xml

Modified: incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java?rev=1071590&r1=1071589&r2=1071590&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java (original)
+++ incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java Thu Feb 17 12:00:19 2011
@@ -62,6 +62,7 @@ public enum NamespaceEnum {
     bio("dc-bio","http://purl.org/vocab/bio/0.1/"),
     rss("http://purl.org/rss/1.0/"),
     goodRelations("gr","http://purl.org/goodrelations/v1#"),
+    swrc("http://swrc.ontoware.org/ontology#"), //The Semantic Web for Research Communities Ontology
     //Linked Data Ontologies
     dbpediaOnt("dbp-ont","http://dbpedia.org/ontology/"),
     dbpediaProp("dbp-prop","http://dbpedia.org/property/"),

Added: incubator/stanbol/trunk/entityhub/indexing/dblp/README.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dblp/README.txt?rev=1071590&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dblp/README.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/dblp/README.txt Thu Feb 17 12:00:19 2011
@@ -0,0 +1,38 @@
+Indexer for the DBLP dataset (see http://dblp.uni-trier.de/)
+
+This Tool creates a full cache for DBLP based on the RDF Dump available at
+http://dblp.l3s.de/dblp.rdf.gz
+
+Building:
+========
+If not yet build by the built process of the entityhub call
+   mvn install
+in this directory.
+
+To create the runable jar that contains all the dependencies call
+   mvn assembly:assembly
+   
+If everything completes successfully, than there should be two jar files within
+the target directory.
+The one called 
+   org.apache.stanbol.entityhub.indexing.dblp-0.1*-jar-with-dependencies.jar
+is the one to be used for indexing.
+
+Creating the index:
+==================
+
+(1) download the dump from http://dblp.l3s.de/dblp.rdf.gz
+(2) rename the dump file to "dblp.nt.gz" to allow the RdfIndexer to correctly 
+    set the RDF format to NTRIPLES!
+(3) The Indexer will need a SolrServer. So you need to prepare the Solr Index
+    to store the data.
+    A default configuration is provided within the "/solrConf" directory. This
+    can be used to configure a SorlServer or a new Core to an existing SolrServer.
+    You can also copy the "dblp" folder within the "/solrConf" directory to an
+    other location and than parse the absolute path as SolrServer location to the
+    Tool. In that case an EmbeddedSolrServer will be used for indexing. 
+(4) call the tool with the -h option to print the help screen
+    java -jar ./target/org.apache.stanbol.entityhub.indexing.dblp-*-jar-with-dependencies.jar -h
+
+Indexing took about 3h on my Computer. Indexing time heavily depends on the
+used hard disc.
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/dblp/README.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml?rev=1071590&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml (added)
+++ incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml Thu Feb 17 12:00:19 2011
@@ -0,0 +1,155 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+	<modelVersion>4.0.0</modelVersion>
+<!-- 
+	<parent>
+		<groupId>org.apache.stanbol</groupId>
+		<artifactId>org.apache.stanbol.entityhub.parent</artifactId>
+		<version>0.1-SNAPSHOT</version>
+		<relativePath>../../parent</relativePath>
+	</parent>
+-->
+	<groupId>org.apache.stanbol</groupId>
+	<artifactId>org.apache.stanbol.entityhub.indexing.dblp</artifactId>
+	<packaging>jar</packaging>
+	<version>0.1-SNAPSHOT</version>
+	<name>Apache Stanbol Entityhub Indexing for dblp</name>
+	<description>This uses the RDF dump provided by 
+        http://dblp.l3s.de/dblp.rdf.gz to create a Full Cache 
+        for DBLP Computer Science Bibliography 
+        (http://dblp.uni-trier.de) within an SolrYard
+    </description>
+  <scm>
+    <connection>
+      scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/indexing/dbPedia
+    </connection>
+    <developerConnection>
+      scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/indexing/dbPedia
+    </developerConnection>
+    <url>http://incubator.apache.org/stanbol</url>
+  </scm>
+	<dependencies>
+        <!-- for logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.5.8</version>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>1.5.8</version>
+            <scope>runtime</scope>
+        </dependency>
+        <!-- RICK dependencies -->
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.entityhub.servicesapi</artifactId>
+			<version>0.1-SNAPSHOT</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.entityhub.core</artifactId>
+			<version>0.1-SNAPSHOT</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.entityhub.yard.solr</artifactId>
+			<version>0.1-SNAPSHOT</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.entityhub.core</artifactId>
+			<version>0.1-SNAPSHOT</version>
+		</dependency>
+        <dependency>
+            <groupId>org.apache.stanbol</groupId>
+            <artifactId>org.apache.stanbol.entityhub.indexing.genericRdf</artifactId>
+            <version>0.1-SNAPSHOT</version>
+        </dependency>
+       <!-- for the main -->
+		<dependency>
+			<groupId>commons-cli</groupId>
+			<artifactId>commons-cli</artifactId>
+			<version>1.2</version>
+		</dependency>
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+           <version>1.4</version>
+        </dependency>
+	    <dependency>
+	      <groupId>org.apache.commons</groupId>
+	      <artifactId>commons-compress</artifactId>
+	      <version>1.0</version>
+	    </dependency>
+        <!-- osgi stuff -->
+        <dependency>
+          <groupId>org.apache.felix</groupId>
+          <artifactId>org.apache.felix.configadmin</artifactId>
+          <version>1.2.4</version>
+          <exclusions>
+            <exclusion>
+              <groupId>org.apache.felix</groupId>
+              <artifactId>org.osgi.foundation</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.felix</groupId>
+          <artifactId>org.osgi.core</artifactId>
+          <version>1.4.0</version>
+        </dependency>
+		<!-- for testing -->
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>4.7</version>
+			<scope>test</scope>
+		</dependency>
+	</dependencies>
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<configuration>
+					<source>1.6</source>
+					<target>1.6</target>
+				</configuration>
+			</plugin>
+			<plugin>
+				<artifactId>maven-assembly-plugin</artifactId>
+				<version>2.2</version>
+				<configuration>
+					<descriptorRefs>
+						<descriptorRef>jar-with-dependencies</descriptorRef>
+					</descriptorRefs>
+					<archive>
+						<manifest>
+							<mainClass>org.apache.stanbol.entityhub.indexing.dblp.cli.CommandLineRunner</mainClass>
+						</manifest>
+					</archive>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+</project>

Propchange: incubator/stanbol/trunk/entityhub/indexing/dblp/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/admin-extra.html
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/admin-extra.html?rev=1071590&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/admin-extra.html (added)
+++ incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/admin-extra.html Thu Feb 17 12:00:19 2011
@@ -0,0 +1,31 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- The content of this page will be statically included into the top
+of the admin page.  Uncomment this as an example to see there the content
+will show up.
+
+<hr>
+<i>This line will appear before the first table</i>
+<tr>
+<td colspan="2">
+This row will be appended to the end of the first table
+</td>
+</tr>
+<hr>
+
+-->

Propchange: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/admin-extra.html
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/elevate.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/elevate.xml?rev=1071590&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/elevate.xml (added)
+++ incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/elevate.xml Thu Feb 17 12:00:19 2011
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- If this file is found in the config directory, it will only be
+     loaded once at startup.  If it is found in Solr's data
+     directory, it will be re-loaded every commit.
+-->
+
+<elevate>
+<!--
+ <query text="foo bar">
+  <doc id="1" />
+  <doc id="2" />
+  <doc id="3" />
+ </query>
+ 
+ <query text="ipod">
+   <doc id="MA147LL/A" />  <!- put the actual ipod at the top ->
+   <doc id="IW-02" exclude="true" /> <!- exclude this cable ->
+ </query>
+-->
+</elevate>

Propchange: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/elevate.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/mapping-ISOLatin1Accent.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/mapping-ISOLatin1Accent.txt?rev=1071590&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/mapping-ISOLatin1Accent.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/mapping-ISOLatin1Accent.txt Thu Feb 17 12:00:19 2011
@@ -0,0 +1,246 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Syntax:
+#   "source" => "target"
+#     "source".length() > 0 (source cannot be empty.)
+#     "target".length() >= 0 (target can be empty.)
+
+# example:
+#   "À" => "A"
+#   "\u00C0" => "A"
+#   "\u00C0" => "\u0041"
+#   "ß" => "ss"
+#   "\t" => " "
+#   "\n" => ""
+
+# À => A
+"\u00C0" => "A"
+
+# Á => A
+"\u00C1" => "A"
+
+# Â => A
+"\u00C2" => "A"
+
+# Ã => A
+"\u00C3" => "A"
+
+# Ä => A
+"\u00C4" => "A"
+
+# Å => A
+"\u00C5" => "A"
+
+# Æ => AE
+"\u00C6" => "AE"
+
+# Ç => C
+"\u00C7" => "C"
+
+# È => E
+"\u00C8" => "E"
+
+# É => E
+"\u00C9" => "E"
+
+# Ê => E
+"\u00CA" => "E"
+
+# Ë => E
+"\u00CB" => "E"
+
+# Ì => I
+"\u00CC" => "I"
+
+# Í => I
+"\u00CD" => "I"
+
+# Î => I
+"\u00CE" => "I"
+
+# Ï => I
+"\u00CF" => "I"
+
+# IJ => IJ
+"\u0132" => "IJ"
+
+# Ð => D
+"\u00D0" => "D"
+
+# Ñ => N
+"\u00D1" => "N"
+
+# Ò => O
+"\u00D2" => "O"
+
+# Ó => O
+"\u00D3" => "O"
+
+# Ô => O
+"\u00D4" => "O"
+
+# Õ => O
+"\u00D5" => "O"
+
+# Ö => O
+"\u00D6" => "O"
+
+# Ø => O
+"\u00D8" => "O"
+
+# Π=> OE
+"\u0152" => "OE"
+
+# Þ
+"\u00DE" => "TH"
+
+# Ù => U
+"\u00D9" => "U"
+
+# Ú => U
+"\u00DA" => "U"
+
+# Û => U
+"\u00DB" => "U"
+
+# Ü => U
+"\u00DC" => "U"
+
+# Ý => Y
+"\u00DD" => "Y"
+
+# Ÿ => Y
+"\u0178" => "Y"
+
+# à => a
+"\u00E0" => "a"
+
+# á => a
+"\u00E1" => "a"
+
+# â => a
+"\u00E2" => "a"
+
+# ã => a
+"\u00E3" => "a"
+
+# ä => a
+"\u00E4" => "a"
+
+# å => a
+"\u00E5" => "a"
+
+# æ => ae
+"\u00E6" => "ae"
+
+# ç => c
+"\u00E7" => "c"
+
+# è => e
+"\u00E8" => "e"
+
+# é => e
+"\u00E9" => "e"
+
+# ê => e
+"\u00EA" => "e"
+
+# ë => e
+"\u00EB" => "e"
+
+# ì => i
+"\u00EC" => "i"
+
+# í => i
+"\u00ED" => "i"
+
+# î => i
+"\u00EE" => "i"
+
+# ï => i
+"\u00EF" => "i"
+
+# ij => ij
+"\u0133" => "ij"
+
+# ð => d
+"\u00F0" => "d"
+
+# ñ => n
+"\u00F1" => "n"
+
+# ò => o
+"\u00F2" => "o"
+
+# ó => o
+"\u00F3" => "o"
+
+# ô => o
+"\u00F4" => "o"
+
+# õ => o
+"\u00F5" => "o"
+
+# ö => o
+"\u00F6" => "o"
+
+# ø => o
+"\u00F8" => "o"
+
+# œ => oe
+"\u0153" => "oe"
+
+# ß => ss
+"\u00DF" => "ss"
+
+# þ => th
+"\u00FE" => "th"
+
+# ù => u
+"\u00F9" => "u"
+
+# ú => u
+"\u00FA" => "u"
+
+# û => u
+"\u00FB" => "u"
+
+# ü => u
+"\u00FC" => "u"
+
+# ý => y
+"\u00FD" => "y"
+
+# ÿ => y
+"\u00FF" => "y"
+
+# ff => ff
+"\uFB00" => "ff"
+
+# fi => fi
+"\uFB01" => "fi"
+
+# fl => fl
+"\uFB02" => "fl"
+
+# ffi => ffi
+"\uFB03" => "ffi"
+
+# ffl => ffl
+"\uFB04" => "ffl"
+
+# ſt => ft
+"\uFB05" => "ft"
+
+# st => st
+"\uFB06" => "st"

Propchange: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/mapping-ISOLatin1Accent.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/protwords.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/protwords.txt?rev=1071590&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/protwords.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/protwords.txt Thu Feb 17 12:00:19 2011
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+

Propchange: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/protwords.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/schema.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/schema.xml?rev=1071590&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/schema.xml (added)
+++ incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/schema.xml Thu Feb 17 12:00:19 2011
@@ -0,0 +1,458 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--  
+ This is the Solr schema file. This file should be named "schema.xml" and
+ should be in the conf directory under the solr home
+ (i.e. ./solr/conf/schema.xml by default) 
+ or located where the classloader for the Solr webapp can find it.
+
+ This schema will be used by the Apache Stanbol SolrYard implementation to
+ index entities.
+ The implementation of the SolrYard does made several assumptions on
+ configurations defined in this Schema. So changes to this schema that do
+ affect such assumptions will most likelly cause unpredictable errors!
+ 
+ However there are also a lot of places where users can optimize this schema
+ to specific requirements. See the comments within this schema for more
+ details!
+
+ For more information, on how to customize the Solr schema.xml in general, 
+ please see http://wiki.apache.org/solr/SchemaXml.
+
+-->
+
+<schema name="Apache Stanbol SolrYard Schema" version="1.2">
+  <!--
+    The SolrYard supports a list of types that is reflected by
+    "fieldType" specifications within this schema.
+    See the specific fieldType definition for more information
+  -->
+  <types>
+    <!-- 
+      This fieldType is used to store values with the dataType "xsd:string".
+      It is NOT used for natural language texts. Assume that this data type is
+      used for ISBN numbers, article numbers, string representations of
+      unsupported data types ...
+    -->
+    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
+    <!-- 
+      This can be used as alternative to "string" to enable case insensitive
+      searches on string values.
+      The KeywordTokenizerFactory ensures that the whole string is preserved as
+      a single token.
+    -->
+    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+
+    <!-- boolean type: "true" or "false" used to store values with the datatype "xsd:boolean" -->
+    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
+    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings.
+        Currently not used by the SolrYard implementation, but reserved for future use. -->
+    <fieldtype name="binary" class="solr.BinaryField"/>
+    <!--
+      Default numeric and date field types. By default used to index numeric values.
+      Note that the "solr.TrieIntField" does support indexing values at various
+      levels of precision to accelerate range queries. However the
+      precisionStep of 0 used by this fieldTypes disables this feature.
+      Change presisionStep to values > 0 to activate hierarchival indexing
+      for all numeric fields of that types. See Solr documentation for
+      suitable values and examples.
+    -->
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
+
+    <!--
+      Numeric and date field types that do activate indexing values at various
+      levels of precision to accelerate range queries.
+      This can be used to activate hierarchival indexing for specific
+      fields. See Notes within the field section.
+    -->
+    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
+
+    <!-- 
+      Natural Language Texts
+      
+      Indexing of natural language texts are supported by the solr.TextField class that
+      allows the specification of custom text analyzers specified as a tokenizer and a 
+      list of token filters.
+      
+      For more info on customizing your analyzer chain, please see
+      http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
+      
+      The SolrYard has support for different languages. Within the schema.xml one need
+      to define first a "fieldType" for language and second a dynamicField for the
+      prefix used by the SolrYard for this language.
+      For more information about the prefixes used by the SolrYard see the notes in the
+      field section of this configuration.
+      
+      In addition the SolrYard also indexes natural language values (of any language)
+      together with string values within a special field to support searches for
+      texts without an specified language.
+    -->
+    <!-- 
+      A general unstemmed text field - good if one does not know the language of the field.
+      This is used as the default fieldType for fields that store values of different
+      languages.
+      It is also the default fieldType for languages that do not define special fieldTypes.
+    -->
+    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- 
+      A text field that only splits on whitespace for exact matching of words.
+      Currently not used. May be used as an alternative to the textgen fieldType.
+    -->
+    <!--
+    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+    -->
+    
+    <!-- 
+      This is the default fieldType used for english language texts.
+      
+      Less flexible matching than the text_en field type, but less false matches.  
+      Probably not ideal for product names, but may be good for SKUs. 
+      Can insert dashes in the wrong place and still match.
+    -->
+    <fieldType name="text_en_Tight" class="solr.TextField" positionIncrementGap="100" >
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
+             possible with WordDelimiterFilter in conjuncton with stemming. -->
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+
+    <!-- 
+      This can be used as an alternative to the "text_en_Tight" fieldTpye for
+      english langauge texts.
+      
+      A text field that uses WordDelimiterFilter to enable splitting and matching of
+      words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
+      so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
+      Synonyms and stopwords are customized by external files, and stemming is enabled.
+    -->
+    <!--
+    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+      </analyzer>
+    </fieldType>
+    -->
+    
+    <!--
+      The SolrYard allows leading Wildcards (e.g. "*aris"). To provide
+      good query performance for such queries one need to configure
+      fieldTypes that use the ReversedWildcardFilterFactory as shown by
+      this example.
+      See Solr documentation for details
+      
+      A general unstemmed text field that indexes tokens normally and also
+      reversed (via ReversedWildcardFilterFactory), to enable more efficient 
+	  leading wildcard queries. 
+    -->
+    <!--
+    <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    -->
+    <!-- charFilter + WhitespaceTokenizer  -->
+    <!--
+    <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
+      <analyzer>
+        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+    -->
+
+    <!--
+      This can be used to deactivate some functionality of the SolrYard or
+      to configure that some fields of a data set are not stored nor indexed
+      regardless of the Apache Stanbol Entityhub configuration!
+    --> 
+    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> 
+
+ </types>
+
+
+ <fields>
+   <!-- 
+     For Information about the different attributes for fields
+     see http://wiki.apache.org/solr/SchemaXml. 
+   -->
+    <!--
+      The uri field is used as ID for documents indexed by the SolrYard.
+      Do not change this definition!
+    -->
+   <field name="uri" type="string" indexed="true" stored="true" required="true" />
+
+   <!-- 
+     used to index all natural language texts of all fields of a document
+    (via copyField). This is used as default search field.
+    The type may be changed.
+     -->
+   <field name="_text" type="textgen" indexed="true" stored="false" multiValued="true"/>
+   <!-- 
+     used to store all references of the document (via copyField).
+     This field may be used to search for related entities.
+     Do not change this definition!
+    -->
+   <field name="_ref" type="string" indexed="true" stored="false" multiValued="true"/>
+   <!-- 
+     Field used to store the domain in case multiple datasets are stored witin the same
+     Solr index (search for "Multi Yard Layout" for details).
+     Do not change this definition!
+   -->
+   <field name="_domain" type="string" indexed="true" stored="false" multiValued="true"/>
+
+   <!-- 
+     Dynamic field definitions (used if a field name is not found)
+     see http://wiki.apache.org/solr/SchemaXml for details.
+     
+     The SolrYard heavily uses dynamic fields to index fields based on there
+     data type.
+     Notes:
+       - By default all dynamicField specifications use multiValued="true"
+         because this schema.xml does not make any assumptions on the data stored.
+         Even a boolean field may define both "true" and "false" as values.
+       - The SolrYard currently uses only prefixes to represent data types 
+         and languages. However postfixes are reserved for future extensions.
+         That means that postfixes MUST NOT be used for dynamicField definitions
+   -->
+   <!--
+     Used for all fields with boolean data type.
+     One can change multivalued to "false" in case the data indexed do confirm to
+     this restriction
+   -->
+   <dynamicField name="bool/*" type="boolean" indexed="true"  stored="true" multiValued="true"/>
+   <!--
+     Numeric dataTypes.
+     Note that thy type can be changed to fieldTypes supporting hierarchical
+     indexing of values to increase performance of ranged queries
+   -->
+   <dynamicField name="int/*"  type="int"     indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="lon/*"  type="long"    indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="flo/*"  type="float"   indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="dou/*"  type="double"  indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="cal/*"  type="date"    indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="dur/*"  type="string"  indexed="true"  stored="true" multiValued="true"/>
+   <!-- 
+     String fields that are not natural language
+     To support case insensitive searches in such fields change 
+     the type to "lowercase"
+   -->
+   <dynamicField name="str/*"  type="string"  indexed="true"  stored="true" multiValued="true"/>
+   <!-- 
+     references are values that represent IDs of other resources.
+     Typically this will store URIs but in principle also other IDs
+     could be used.
+   -->
+   <dynamicField name="ref/*"  type="string"  indexed="true"  stored="true" multiValued="true"/>
+
+   <!--
+     DynamicFields representing natural language texts.
+     
+     The type of such fields may be changed to one of the alternatives 
+     descibed in the types section of this configuration.
+
+     The SolrYard prefixes natural language texts with "@"
+     followed by the defined language.
+     Currently no processing of the language is done. So
+     defineing the language "1xx5zr7" for Text will create
+     a field with the prefix "@1xx5zr7".
+     
+     To define a special field type for a language one has
+     to use "@" plus the key for the language as prefix.
+     See the specification for English language texts as example.
+     
+     Also prefixes for country specific languages can be
+     realized by defining dynamic fields like follows:
+       use @en* to match en-GB and en-US
+       use @en/* to match only en but not en-GB or en-US
+       use @en-GB/* and @en* to have a special field type for 
+         en-GB and one for other english text
+   -->
+   <!-- 
+     Dynamic field for english languages.
+     Note that the prefix "@en*" matches also "@en-GB" and "@en-US"
+   -->
+   <dynamicField name="@en*"  type="text_en_Tight" indexed="true" stored="true" multiValued="true"/>
+   <!-- 
+     The "@*" catches all the other languages including "@/" 
+     (default language) used for texts without a defined language
+   -->
+   <dynamicField name="@*"  type="textgen"  indexed="true"  stored="true" multiValued="true"/>
+
+   <!--
+     To add special configurations for specific fields one
+     has to include the fieldName within the prefix of the
+     dynamicField specification.
+     The SolrYard uses namespace prefixes to generate
+     field names. When defined prefixes defined in the
+     NamespaceEnum of the Entityhub are used.
+     Currently there is no way to define used prefixes for
+     other namespaces.
+     
+     This example shows how to activate lower case search
+     for the dcmi-terms format property
+  <dynamicField name="str/dc:format*" type="lowercase" indexed="true"  stored="true" multiValued="true"/>
+     
+     This example shows how to activate fast ranged queries
+     for spatial searches
+  <dynamicField name="dou/geo:lat*" type="tdouble" indexed="true"  stored="true" multiValued="true"/>
+  <dynamicField name="dou/geo:lon*" type="tdouble" indexed="true"  stored="true" multiValued="true"/>
+  <dynamicField name="int/geo:alt*" type="tint" indexed="true"  stored="true" multiValued="true"/>
+     
+     This example defines to use the type "text_en" for
+     english language rdfs:comment values.
+   
+   <dynamicField name="@en/rdfs:comment*"  type="text_en"  indexed="true"  stored="true" multiValued="true"/>
+     
+     Note that this would not match "en-GB" nor "en-US".
+   -->
+    
+    
+   <!--
+     The following dynamic field specifications define fiedls used
+     by the SolrYard implementation for special purposes.
+   -->
+         
+   <!-- 
+     the "_!@*" fields contain a copy of all languages AND string values
+     for that field. This field is used for text queries with no specified
+     language.
+     This field need not to be stored. The type can be changed to alternatives
+     as described in the types section of this configuration.
+   -->
+   <dynamicField name="_!@*"  type="textgen"  indexed="true"  stored="false" multiValued="true"/>
+   <!-- 
+     fields starting with "_config/" are used to store configurations about how the
+     index was created within the index (e.g. used namespace prefixes).
+     Do not change this definition!
+   -->
+   <dynamicField name="_config/*" type="string" indexed="false" multiValued="true"/>
+      
+ </fields>
+
+ <!-- 
+   Field to use to determine and enforce document uniqueness.
+   -->
+ <uniqueKey>uri</uniqueKey>
+
+ <!-- 
+   field for the QueryParser to use when an explicit fieldname is absent.
+   The SolrYard does currently not take advantage of this. However it can
+   be used when directly accessing the SolrYard.
+ -->
+ <defaultSearchField>_text</defaultSearchField>
+
+ <!--
+   The SolrYard explizitly adds AND and OR for all boolean terms in
+   generated queries. So changing that should have no influence on
+   the SolrYard (not tested) 
+   
+   SolrQueryParser configuration: defaultOperator="AND|OR" 
+ -->
+ <solrQueryParser defaultOperator="OR"/>
+
+  <!--
+    The SolrYard Implementation assumes the following copyField commands.
+    This commands MUST NOT be removed! 
+   -->
+
+   <!-- 
+     Values of all fields that represent natural language texts
+     or string values are copied to the default search field
+     "_text".
+     Currently the SolrYard does not use this field, but it is
+     reserved for future useage and MUST therefoer already be
+     included when indexing documents
+   -->
+   <copyField source="@*" dest="_text"/>
+   <copyField source="str/*" dest="_text"/>
+   <!--
+     All references to other entities (documents) need to be
+     copied to the "_ref" field.
+     This field is required to query for dependencies of other
+     documents (e.g. when one needs to remove a docuemnt and
+     all references to it)
+   -->
+   <copyField source="ref/*" dest="_ref"/>
+   
+</schema>

Propchange: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/schema.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/scripts.conf
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/scripts.conf?rev=1071590&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/scripts.conf (added)
+++ incubator/stanbol/trunk/entityhub/indexing/dblp/solrConf/dblp/conf/scripts.conf Thu Feb 17 12:00:19 2011
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+user=
+solr_hostname=localhost
+solr_port=8983
+rsyncd_port=18983
+data_dir=
+webapp_name=solr
+master_host=
+master_data_dir=
+master_status_dir=