You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2011/04/29 11:20:38 UTC
svn commit: r1097740 [7/10] - in /incubator/stanbol/trunk: entityhub/ entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/mapping/ entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/mapping/ entityhu...

Added: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_atom.xsl
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_atom.xsl?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_atom.xsl (added)
+++ incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_atom.xsl Fri Apr 29 09:20:31 2011
@@ -0,0 +1,67 @@
+<?xml version='1.0' encoding='UTF-8'?>
+
+<!-- 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ -->
+
+<!-- 
+  Simple transform of Solr query results to Atom
+ -->
+
+<xsl:stylesheet version='1.0'
+    xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
+
+  <xsl:output
+       method="xml"
+       encoding="utf-8"
+       media-type="text/xml; charset=UTF-8"
+  />
+
+  <xsl:template match='/'>
+    <xsl:variable name="query" select="response/lst[@name='responseHeader']/lst[@name='params']/str[@name='q']"/>
+    <feed xmlns="http://www.w3.org/2005/Atom">
+      <title>Example Solr Atom 1.0 Feed</title>
+      <subtitle>
+       This has been formatted by the sample "example_atom.xsl" transform -
+       use your own XSLT to get a nicer Atom feed.
+      </subtitle>
+      <author>
+        <name>Apache Solr</name>
+        <email>solr-user@lucene.apache.org</email>
+      </author>
+      <link rel="self" type="application/atom+xml" 
+            href="http://localhost:8983/solr/q={$query}&amp;wt=xslt&amp;tr=atom.xsl"/>
+      <updated>
+        <xsl:value-of select="response/result/doc[position()=1]/date[@name='timestamp']"/>
+      </updated>
+      <id>tag:localhost,2007:example</id>
+      <xsl:apply-templates select="response/result/doc"/>
+    </feed>
+  </xsl:template>
+    
+  <!-- search results xslt -->
+  <xsl:template match="doc">
+    <xsl:variable name="id" select="str[@name='id']"/>
+    <entry>
+      <title><xsl:value-of select="str[@name='name']"/></title>
+      <link href="http://localhost:8983/solr/select?q={$id}"/>
+      <id>tag:localhost,2007:<xsl:value-of select="$id"/></id>
+      <summary><xsl:value-of select="arr[@name='features']"/></summary>
+      <updated><xsl:value-of select="date[@name='timestamp']"/></updated>
+    </entry>
+  </xsl:template>
+
+</xsl:stylesheet>

Propchange: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_atom.xsl
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_rss.xsl
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_rss.xsl?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_rss.xsl (added)
+++ incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_rss.xsl Fri Apr 29 09:20:31 2011
@@ -0,0 +1,66 @@
+<?xml version='1.0' encoding='UTF-8'?>
+
+<!-- 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ -->
+
+<!-- 
+  Simple transform of Solr query results to RSS
+ -->
+
+<xsl:stylesheet version='1.0'
+    xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
+
+  <xsl:output
+       method="xml"
+       encoding="utf-8"
+       media-type="text/xml; charset=UTF-8"
+  />
+  <xsl:template match='/'>
+    <rss version="2.0">
+       <channel>
+	 <title>Example Solr RSS 2.0 Feed</title>
+         <link>http://localhost:8983/solr</link>
+         <description>
+          This has been formatted by the sample "example_rss.xsl" transform -
+          use your own XSLT to get a nicer RSS feed.
+         </description>
+         <language>en-us</language>
+         <docs>http://localhost:8983/solr</docs>
+         <xsl:apply-templates select="response/result/doc"/>
+       </channel>
+    </rss>
+  </xsl:template>
+  
+  <!-- search results xslt -->
+  <xsl:template match="doc">
+    <xsl:variable name="id" select="str[@name='id']"/>
+    <xsl:variable name="timestamp" select="date[@name='timestamp']"/>
+    <item>
+      <title><xsl:value-of select="str[@name='name']"/></title>
+      <link>
+        http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/>
+      </link>
+      <description>
+        <xsl:value-of select="arr[@name='features']"/>
+      </description>
+      <pubDate><xsl:value-of select="$timestamp"/></pubDate>
+      <guid>
+        http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/>
+      </guid>
+    </item>
+  </xsl:template>
+</xsl:stylesheet>

Propchange: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_rss.xsl
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/luke.xsl
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/luke.xsl?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/luke.xsl (added)
+++ incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/luke.xsl Fri Apr 29 09:20:31 2011
@@ -0,0 +1,337 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+    
+    http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+
+<!-- 
+  Display the luke request handler with graphs
+ -->
+<xsl:stylesheet
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns="http://www.w3.org/1999/xhtml"
+    version="1.0"
+    >
+    <xsl:output
+        method="html"
+        encoding="UTF-8"
+        media-type="text/html; charset=UTF-8"
+        doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
+        doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
+    />
+
+    <xsl:variable name="title">Solr Luke Request Handler Response</xsl:variable>
+
+    <xsl:template match="/">
+        <html xmlns="http://www.w3.org/1999/xhtml">
+            <head>
+                <link rel="stylesheet" type="text/css" href="solr-admin.css"/>
+                <link rel="icon" href="favicon.ico" type="image/ico"/>
+                <link rel="shortcut icon" href="favicon.ico" type="image/ico"/>
+                <title>
+                    <xsl:value-of select="$title"/>
+                </title>
+                <xsl:call-template name="css"/>
+
+            </head>
+            <body>
+                <h1>
+                    <xsl:value-of select="$title"/>
+                </h1>
+                <div class="doc">
+                    <ul>
+                        <xsl:if test="response/lst[@name='index']">
+                            <li>
+                                <a href="#index">Index Statistics</a>
+                            </li>
+                        </xsl:if>
+                        <xsl:if test="response/lst[@name='fields']">
+                            <li>
+                                <a href="#fields">Field Statistics</a>
+                                <ul>
+                                    <xsl:for-each select="response/lst[@name='fields']/lst">
+                                        <li>
+                                            <a href="#{@name}">
+                                                <xsl:value-of select="@name"/>
+                                            </a>
+                                        </li>
+                                    </xsl:for-each>
+                                </ul>
+                            </li>
+                        </xsl:if>
+                        <xsl:if test="response/lst[@name='doc']">
+                            <li>
+                                <a href="#doc">Document statistics</a>
+                            </li>
+                        </xsl:if>
+                    </ul>
+                </div>
+                <xsl:if test="response/lst[@name='index']">
+                    <h2><a name="index"/>Index Statistics</h2>
+                    <xsl:apply-templates select="response/lst[@name='index']"/>
+                </xsl:if>
+                <xsl:if test="response/lst[@name='fields']">
+                    <h2><a name="fields"/>Field Statistics</h2>
+                    <xsl:apply-templates select="response/lst[@name='fields']"/>
+                </xsl:if>
+                <xsl:if test="response/lst[@name='doc']">
+                    <h2><a name="doc"/>Document statistics</h2>
+                    <xsl:apply-templates select="response/lst[@name='doc']"/>
+                </xsl:if>
+            </body>
+        </html>
+    </xsl:template>
+
+    <xsl:template match="lst">
+        <xsl:if test="parent::lst">
+            <tr>
+                <td colspan="2">
+                    <div class="doc">
+                        <xsl:call-template name="list"/>
+                    </div>
+                </td>
+            </tr>
+        </xsl:if>
+        <xsl:if test="not(parent::lst)">
+            <div class="doc">
+                <xsl:call-template name="list"/>
+            </div>
+        </xsl:if>
+    </xsl:template>
+
+    <xsl:template name="list">
+        <xsl:if test="count(child::*)>0">
+            <table>
+                <thead>
+                    <tr>
+                        <th colspan="2">
+                            <p>
+                                <a name="{@name}"/>
+                            </p>
+                            <xsl:value-of select="@name"/>
+                        </th>
+                    </tr>
+                </thead>
+                <tbody>
+                    <xsl:choose>
+                        <xsl:when
+                            test="@name='histogram'">
+                            <tr>
+                                <td colspan="2">
+                                    <xsl:call-template name="histogram"/>
+                                </td>
+                            </tr>
+                        </xsl:when>
+                        <xsl:otherwise>
+                            <xsl:apply-templates/>
+                        </xsl:otherwise>
+                    </xsl:choose>
+                </tbody>
+            </table>
+        </xsl:if>
+    </xsl:template>
+
+    <xsl:template name="histogram">
+        <div class="doc">
+            <xsl:call-template name="barchart">
+                <xsl:with-param name="max_bar_width">50</xsl:with-param>
+                <xsl:with-param name="iwidth">800</xsl:with-param>
+                <xsl:with-param name="iheight">160</xsl:with-param>
+                <xsl:with-param name="fill">blue</xsl:with-param>
+            </xsl:call-template>
+        </div>
+    </xsl:template>
+
+    <xsl:template name="barchart">
+        <xsl:param name="max_bar_width"/>
+        <xsl:param name="iwidth"/>
+        <xsl:param name="iheight"/>
+        <xsl:param name="fill"/>
+        <xsl:variable name="max">
+            <xsl:for-each select="int">
+                <xsl:sort data-type="number" order="descending"/>
+                <xsl:if test="position()=1">
+                    <xsl:value-of select="."/>
+                </xsl:if>
+            </xsl:for-each>
+        </xsl:variable>
+        <xsl:variable name="bars">
+           <xsl:value-of select="count(int)"/>
+        </xsl:variable>
+        <xsl:variable name="bar_width">
+           <xsl:choose>
+             <xsl:when test="$max_bar_width &lt; ($iwidth div $bars)">
+               <xsl:value-of select="$max_bar_width"/>
+             </xsl:when>
+             <xsl:otherwise>
+               <xsl:value-of select="$iwidth div $bars"/>
+             </xsl:otherwise>
+           </xsl:choose>
+        </xsl:variable>
+        <table class="histogram">
+           <tbody>
+              <tr>
+                <xsl:for-each select="int">
+                   <td>
+                 <xsl:value-of select="."/>
+                 <div class="histogram">
+                  <xsl:attribute name="style">background-color: <xsl:value-of select="$fill"/>; width: <xsl:value-of select="$bar_width"/>px; height: <xsl:value-of select="($iheight*number(.)) div $max"/>px;</xsl:attribute>
+                 </div>
+                   </td> 
+                </xsl:for-each>
+              </tr>
+              <tr>
+                <xsl:for-each select="int">
+                   <td>
+                       <xsl:value-of select="@name"/>
+                   </td>
+                </xsl:for-each>
+              </tr>
+           </tbody>
+        </table>
+    </xsl:template>
+
+    <xsl:template name="keyvalue">
+        <xsl:choose>
+            <xsl:when test="@name">
+                <tr>
+                    <td class="name">
+                        <xsl:value-of select="@name"/>
+                    </td>
+                    <td class="value">
+                        <xsl:value-of select="."/>
+                    </td>
+                </tr>
+            </xsl:when>
+            <xsl:otherwise>
+                <xsl:value-of select="."/>
+            </xsl:otherwise>
+        </xsl:choose>
+    </xsl:template>
+
+    <xsl:template match="int|bool|long|float|double|uuid|date">
+        <xsl:call-template name="keyvalue"/>
+    </xsl:template>
+
+    <xsl:template match="arr">
+        <tr>
+            <td class="name">
+                <xsl:value-of select="@name"/>
+            </td>
+            <td class="value">
+                <ul>
+                    <xsl:for-each select="child::*">
+                        <li>
+                            <xsl:apply-templates/>
+                        </li>
+                    </xsl:for-each>
+                </ul>
+            </td>
+        </tr>
+    </xsl:template>
+
+    <xsl:template match="str">
+        <xsl:choose>
+            <xsl:when test="@name='schema' or @name='index' or @name='flags'">
+                <xsl:call-template name="schema"/>
+            </xsl:when>
+            <xsl:otherwise>
+                <xsl:call-template name="keyvalue"/>
+            </xsl:otherwise>
+        </xsl:choose>
+    </xsl:template>
+
+    <xsl:template name="schema">
+        <tr>
+            <td class="name">
+                <xsl:value-of select="@name"/>
+            </td>
+            <td class="value">
+                <xsl:if test="contains(.,'unstored')">
+                    <xsl:value-of select="."/>
+                </xsl:if>
+                <xsl:if test="not(contains(.,'unstored'))">
+                    <xsl:call-template name="infochar2string">
+                        <xsl:with-param name="charList">
+                            <xsl:value-of select="."/>
+                        </xsl:with-param>
+                    </xsl:call-template>
+                </xsl:if>
+            </td>
+        </tr>
+    </xsl:template>
+
+    <xsl:template name="infochar2string">
+        <xsl:param name="i">1</xsl:param>
+        <xsl:param name="charList"/>
+
+        <xsl:variable name="char">
+            <xsl:value-of select="substring($charList,$i,1)"/>
+        </xsl:variable>
+        <xsl:choose>
+            <xsl:when test="$char='I'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='I']"/> - </xsl:when>
+            <xsl:when test="$char='T'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='T']"/> - </xsl:when>
+            <xsl:when test="$char='S'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='S']"/> - </xsl:when>
+            <xsl:when test="$char='M'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='M']"/> - </xsl:when>
+            <xsl:when test="$char='V'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='V']"/> - </xsl:when>
+            <xsl:when test="$char='o'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='o']"/> - </xsl:when>
+            <xsl:when test="$char='p'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='p']"/> - </xsl:when>
+            <xsl:when test="$char='O'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='O']"/> - </xsl:when>
+            <xsl:when test="$char='L'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='L']"/> - </xsl:when>
+            <xsl:when test="$char='B'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='B']"/> - </xsl:when>
+            <xsl:when test="$char='C'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='C']"/> - </xsl:when>
+            <xsl:when test="$char='f'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='f']"/> - </xsl:when>
+            <xsl:when test="$char='l'">
+                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='l']"/> -
+            </xsl:when>
+        </xsl:choose>
+
+        <xsl:if test="not($i>=string-length($charList))">
+            <xsl:call-template name="infochar2string">
+                <xsl:with-param name="i">
+                    <xsl:value-of select="$i+1"/>
+                </xsl:with-param>
+                <xsl:with-param name="charList">
+                    <xsl:value-of select="$charList"/>
+                </xsl:with-param>
+            </xsl:call-template>
+        </xsl:if>
+    </xsl:template>
+    <xsl:template name="css">
+        <style type="text/css">
+            <![CDATA[
+            td.name {font-style: italic; font-size:80%; }
+            .doc { margin: 0.5em; border: solid grey 1px; }
+            .exp { display: none; font-family: monospace; white-space: pre; }
+            div.histogram { background: none repeat scroll 0%; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial;}
+            table.histogram { width: auto; vertical-align: bottom; }
+            table.histogram td, table.histogram th { text-align: center; vertical-align: bottom; border-bottom: 1px solid #ff9933; width: auto; }
+            ]]>
+        </style>
+    </xsl:template>
+</xsl:stylesheet>

Propchange: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/luke.xsl
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/resources/testEntityScore.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/resources/testEntityScore.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/resources/testEntityScore.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/resources/testEntityScore.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,3 @@
+http://www.example.org/entity/test	100
+http://www.example.org/entity/test2	10
+http://www.example.org/entity/test3	1
\ No newline at end of file

Propchange: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/resources/testEntityScore.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml?rev=1097740&r1=1097739&r2=1097740&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml Fri Apr 29 09:20:31 2011
@@ -22,9 +22,9 @@
 
   <parent>
     <groupId>org.apache.stanbol</groupId>
-    <artifactId>org.apache.stanbol.entityhub.parent</artifactId>
+    <artifactId>org.apache.stanbol.entityhub.indexing.parent</artifactId>
     <version>0.9-SNAPSHOT</version>
-    <relativePath>../../parent</relativePath>
+    <relativePath>../parent</relativePath>
   </parent>
 
   <groupId>org.apache.stanbol</groupId>
@@ -52,12 +52,22 @@
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.entityhub.servicesapi</artifactId>
-      <version>${stanbol-version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.entityhub.core</artifactId>
-      <version>${stanbol-version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.entityhub.indexing.core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-compress</artifactId>
     </dependency>
     <dependency>
       <groupId>com.hp.hpl.jena</groupId>
@@ -75,10 +85,5 @@
       <artifactId>tdb</artifactId>
       <version>0.8.8</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-compress</artifactId>
-      <version>1.0</version>
-    </dependency>
   </dependencies>
 </project>

Modified: incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/java/org/apache/stanbol/entityhub/indexing/rdf/RdfIndexer.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/java/org/apache/stanbol/entityhub/indexing/rdf/RdfIndexer.java?rev=1097740&r1=1097739&r2=1097740&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/java/org/apache/stanbol/entityhub/indexing/rdf/RdfIndexer.java (original)
+++ incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/java/org/apache/stanbol/entityhub/indexing/rdf/RdfIndexer.java Fri Apr 29 09:20:31 2011
@@ -489,6 +489,7 @@ public class RdfIndexer {
 	 * from a single stream and therefore gives the OS the best opportunities to
 	 * optimise file access.
 	 * @throws YardException
+	 * @see {@link #indexRanked()}
 	 */
 	private void indexResources() throws YardException{
         StringBuilder qb = new StringBuilder();
@@ -888,48 +889,15 @@ public class RdfIndexer {
         }
         log.info(" < completed");
     }
-//------------------------------------------------------------------------------
-// Other implemented variants with less performance than indexResource3!
-//------------------------------------------------------------------------------
-//    private void indexResource2(Resource resource){
-//        Query q = QueryFactory.create(String.format(resourceQuery,resource.getURI(),resource.getURI()), Syntax.syntaxARQ);
-//        final ResultSet resultSet = QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect();
-//        Representation source = vf.createRepresentation(resource.getURI());
-//        while(resultSet.hasNext()){
-//            QuerySolution solution =resultSet.next();
-//            RDFNode fieldNode = solution.get("field");
-//            if(fieldNode.isURIResource()){
-//                String field = fieldNode.asResource().getURI();
-//                RDFNode value = solution.get("value");
-//                if(value.isURIResource()){
-//                    source.addReference(field, value.asResource().getURI());
-//                } else if(value.isLiteral()){
-//                    Literal literal = value.asLiteral();
-//                    if(literal.getDatatype() != null){
-//                        Object literalValue;
-//                        try {
-//                            literalValue = literal.getValue();
-//                        } catch (DatatypeFormatException e) {
-//                            log.warn(" Unable to convert "+literal.getLexicalForm()+" to "+literal.getDatatype()+"-> use lecicalForm");
-//                            literalValue = literal.getLexicalForm();
-//                        }
-//                        if(literalValue instanceof BaseDatatype.TypedValue){
-//                            source.add(field, literal.getLexicalForm());
-//                        } else {
-//                            source.add(field, literal.getValue());
-//                        }
-//                    } else {
-//                        String lang = literal.getLanguage();
-//                        if(lang != null && lang.isEmpty()){
-//                            lang = null;
-//                        }
-//                        source.addNaturalText(field, literal.getLexicalForm(),lang);
-//                    }
-//                }
-//            }
-//        }
-//        //log.info("S<source Resource:\n"+ModelUtils.getRepresentationInfo(source));
-//    }
+
+    /**
+     * This indexing method uses the list of Entities to index as input, queries
+     * for the data and indexes them. This performs a query/entity and therefore
+     * does not provide the same read performance that {@link #indexResources()}.
+     * However where only a small amount of all entities are indexed, this 
+     * method will be significant faster. 
+     * @throws YardException
+     */
     private void indexRanked() throws YardException {
         if(entityRankings == null){
             throw new IllegalStateException("Unable to index with Etity Ranking Mode if no Entity Rankings are present!");

Modified: incubator/stanbol/trunk/entityhub/indexing/geonames/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/geonames/pom.xml?rev=1097740&r1=1097739&r2=1097740&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/geonames/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/geonames/pom.xml Fri Apr 29 09:20:31 2011
@@ -19,7 +19,8 @@
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 
   <modelVersion>4.0.0</modelVersion>
-  <!--
+<!-- Do not use a parent because of problems with missing dependencies with
+     mvn assembly:assembly
     <parent>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.entityhub.parent</artifactId>

Added: incubator/stanbol/trunk/entityhub/indexing/parent/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/parent/pom.xml?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/parent/pom.xml (added)
+++ incubator/stanbol/trunk/entityhub/indexing/parent/pom.xml Fri Apr 29 09:20:31 2011
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>org.apache.stanbol.entityhub.parent</artifactId>
+    <version>0.9-SNAPSHOT</version>
+    <relativePath>../../parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.entityhub.indexing.parent</artifactId>
+  <packaging>pom</packaging>
+
+  <name>Apache Stanbol Entityhub Indexingparent POM</name>
+  <description>
+    Parent POM for the Apache Stanbol Entityhub Indexing component
+  </description>
+
+  <inceptionYear>2010</inceptionYear>
+
+  <scm>
+    <connection>
+      scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/parent
+    </connection>
+    <developerConnection>
+      scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/parent
+    </developerConnection>
+    <url>http://incubator.apache.org/stanbol</url>
+  </scm>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.felix</groupId>
+          <artifactId>maven-bundle-plugin</artifactId>
+          <inherited>true</inherited>
+          <configuration>
+            <instructions>
+              <Bundle-Category>Stanbol Entityhub Indexing</Bundle-Category>
+            </instructions>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+
+</project>

Propchange: incubator/stanbol/trunk/entityhub/indexing/parent/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/pom.xml?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/pom.xml (added)
+++ incubator/stanbol/trunk/entityhub/indexing/pom.xml Fri Apr 29 09:20:31 2011
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>org.apache.stanbol.entityhub.indexing.parent</artifactId>
+    <version>0.9-SNAPSHOT</version>
+    <relativePath>./parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.entityhub.indexing.reactor</artifactId>
+  <packaging>pom</packaging>
+
+  <name>Apache Stanbol Entityhub Indexing reactor</name>
+  <description>
+    Pseudo project to build the complete Apache Stanbol Entityhub Indexing component
+  </description>
+
+  <inceptionYear>2010</inceptionYear>
+
+  <scm>
+    <connection>
+      scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/indexing
+    </connection>
+    <developerConnection>
+      scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/indexing
+    </developerConnection>
+    <url>http://incubator.apache.org/stanbol</url>
+  </scm>
+
+  <modules>
+    <module>parent</module>
+
+    <module>core</module>
+    <module>source/jenatdb</module>
+    <module>destination/solryard</module>
+    <!-- Utils for createing local caches (indexing utils) -->
+    <module>geonames</module>
+    <module>genericrdf</module>
+    <module>dbpedia</module>
+    <module>dblp</module>
+  </modules>
+</project>

Propchange: incubator/stanbol/trunk/entityhub/indexing/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/pom.xml?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/pom.xml (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/pom.xml Fri Apr 29 09:20:31 2011
@@ -0,0 +1,120 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>org.apache.stanbol.entityhub.parent</artifactId>
+    <version>0.9-SNAPSHOT</version>
+    <relativePath>../../../parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.entityhub.indexing.source.jenatdb</artifactId>
+  <packaging>bundle</packaging>
+  <name>Apache Stanbol Entityhub IndexingSource for RDF using Jena TDB</name>
+  <description>
+    Provides support for indexing RDF data by using an Jena TDB triple store.
+    It supports to used an existing triple store and to create an new one by
+    loading a provided list of RDF Files.
+    This implementation is tested to work even for very large data sets such as
+    http://dbpedia.org dumps. 
+  </description>
+  <scm>
+    <connection>
+      scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/indexing/rdf/jenatdb
+    </connection>
+    <developerConnection>
+      scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/indexing/rdf/jenatdb
+    </developerConnection>
+    <url>http://incubator.apache.org/stanbol</url>
+  </scm>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Export-Package>
+              org.apache.stanbol.entityhub.indexing.source.jenatdb;version=${project.version}
+            </Export-Package>
+          </instructions>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.entityhub.servicesapi</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.entityhub.core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.entityhub.indexing.core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-compress</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.hp.hpl.jena</groupId>
+      <artifactId>jena</artifactId>
+      <version>2.6.4</version>
+      <exclusions>
+        <exclusion>
+          <artifactId>slf4j-log4j12</artifactId>
+          <groupId>org.slf4j</groupId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.hp.hpl.jena</groupId>
+      <artifactId>tdb</artifactId>
+      <version>0.8.10</version>
+    </dependency>
+    <dependency>
+        <groupId>com.hp.hpl.jena</groupId>
+        <artifactId>arq</artifactId>
+        <version>2.8.8</version>
+    </dependency>
+    <!-- dependencies for testing -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-simple</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>

Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,559 @@
+package org.apache.stanbol.entityhub.indexing.source.jenatdb;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider;
+import org.apache.stanbol.entityhub.indexing.core.IndexingComponent;
+import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceState;
+import org.apache.stanbol.entityhub.servicesapi.model.Reference;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.hp.hpl.jena.datatypes.BaseDatatype;
+import com.hp.hpl.jena.datatypes.DatatypeFormatException;
+import com.hp.hpl.jena.datatypes.RDFDatatype;
+import com.hp.hpl.jena.datatypes.xsd.XSDDateTime;
+import com.hp.hpl.jena.datatypes.xsd.XSDDuration;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.graph.impl.LiteralLabel;
+import com.hp.hpl.jena.query.Query;
+import com.hp.hpl.jena.query.QueryExecutionFactory;
+import com.hp.hpl.jena.query.QueryFactory;
+import com.hp.hpl.jena.query.QuerySolution;
+import com.hp.hpl.jena.query.ResultSet;
+import com.hp.hpl.jena.query.Syntax;
+import com.hp.hpl.jena.rdf.model.RDFNode;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.engine.binding.Binding;
+import com.hp.hpl.jena.tdb.TDBFactory;
+import com.hp.hpl.jena.tdb.base.file.Location;
+import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
+import com.hp.hpl.jena.util.iterator.ExtendedIterator;
+/**
+ * Implementation of an {@link IndexingComponent} for Entity data that provides 
+ * the possibility to both:<ol>
+ * <li>randomly access entity data via the {@link EntityDataProvider} interface
+ * <li>iterate over all entities in this store via the {@link EntityDataIterator}
+ * interface.
+ * </ol>
+ * 
+ * @author Rupert Westenthaler
+ *
+ */
+public class RdfIndexingSource implements EntityDataIterable,EntityDataProvider {
+    /**
+     * The Parameter used to configure the source folder(s) relative to the
+     * {@link IndexingConfig#getSourceFolder()}. The ',' (comma) is used as
+     * separator to parsed multiple sources.
+     */
+    public static final String PARAM_SOURCE_FILE_OR_FOLDER = "source";
+    /**
+     * Parameter used to configure the name of the directory used to store the
+     * RDF model (a Jena TDB dataset). The default name is
+     * {@link #DEFAULT_MODEL_DIRECTORY}
+     */
+    public static final String PARAM_MODEL_DIRECTORY = "model";
+    /**
+     * The Parameter that can be used to deactivate the importing of sources.
+     * If this parameter is set to <code>false</code> the values configured for
+     * {@link #PARAM_IMPORT_SOURCE} are ignored. The default value is
+     * <code>true</code>
+     */
+    public static final String PARAM_IMPORT_SOURCE = "import";
+    /**
+     * The default directory name used to search for RDF files to be imported
+     */
+    public static final String DEFAULT_SOURCE_FOLDER_NAME = "rdf";
+    /**
+     * The default name of the folder used to initialise the 
+     * {@link DatasetGraphTDB Jena TDB dataset}.
+     */
+    public static final String DEFAULT_MODEL_DIRECTORY = "tdb";
+    //protected to allow internal classes direct access (without hidden getter/
+    //setter added by the compiler that decrease performance)
+    protected final static Logger log = LoggerFactory.getLogger(RdfIndexingSource.class);
+    
+    /**
+     * The RDF data
+     */
+    private DatasetGraphTDB indexingDataset;
+    /**
+     * The valueFactory used to create {@link Representation}s, {@link Reference}s
+     * and {@link Text} instances.
+     */
+    private ValueFactory vf;
+    
+    private ResourceLoader loader;
+
+    /**
+     * Default Constructor relaying on that {@link #setConfiguration(Map)} is
+     * called afterwards to provide the configuration!
+     */
+    public RdfIndexingSource(){
+        this(null);
+    }
+    /**
+     * Internally used to initialise a {@link ValueFactory}
+     * @param valueFactory
+     */
+    private RdfIndexingSource(ValueFactory valueFactory){
+        if(valueFactory == null){
+            this.vf = InMemoryValueFactory.getInstance();
+        } else {
+            this.vf = valueFactory;
+        }
+    }
+    /**
+     * Constructs an instance based on the provided parameter
+     * @param modelLocation the directory for the RDF model. MUST NOT be NULL
+     * however the parsed {@link File} needs not to exist.
+     * @param sourceFileOrDirectory the source file or directory containing the
+     * file(s) to import. Parse <code>null</code> if no RDF files need to be 
+     * imported
+     * @param valueFactory The {@link ValueFactory} used to create instances
+     * or <code>null</code> to use the default implementation.
+     */
+    public RdfIndexingSource(File modelLocation, 
+                               File sourceFileOrDirectory,
+                               ValueFactory valueFactory){
+        if(modelLocation == null){
+            throw new IllegalArgumentException("The parsed model location MUST NOT be NULL!");
+        }
+        //init the store
+        this.indexingDataset = createRdfModel(modelLocation);
+        this.loader = createResourceLoader(sourceFileOrDirectory);
+    }
+    @Override
+    public void setConfiguration(Map<String,Object> config) {
+        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
+        //first init the RDF Model
+        Object value = config.get(PARAM_MODEL_DIRECTORY);
+        File modelLocation;
+        if(value == null){
+            modelLocation = new File(indexingConfig.getSourceFolder(),DEFAULT_MODEL_DIRECTORY);
+        } else {
+            modelLocation = new File(indexingConfig.getSourceFolder(),value.toString());
+        }
+        this.indexingDataset = createRdfModel(modelLocation);
+        //second we need to check if we need to import RDF files to the RDF model
+        this.loader = createResourceLoader(null); //create the ResourceLoader
+        //check if importing is deactivated
+        boolean importSource = true; //default is true
+        value = config.get(PARAM_IMPORT_SOURCE);
+        if(value != null){
+            importSource = Boolean.parseBoolean(value.toString());
+        }
+        if(importSource){ // if we need to import ... check the source config
+            log.info("Importing RDF data from:");
+            value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
+            if(value == null){ //if not set use the default
+                value = DEFAULT_SOURCE_FOLDER_NAME;
+            }
+            for(String source : value.toString().split(",")){
+                File sourceFileOrDirectory = new File(indexingConfig.getSourceFolder(),source);
+                if(sourceFileOrDirectory.exists()){
+                    //register the configured source with the ResourceLoader
+                    this.loader.addResource(sourceFileOrDirectory);
+                } else {
+                    log.warn("Unable to find RDF source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
+                }
+            }
+            if(log.isInfoEnabled()){
+                for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
+                    log.info(" > "+registeredSource);
+                }
+            }
+        } else {
+            log.info("Importing RDF data deactivated by parameer {}={}"+PARAM_IMPORT_SOURCE,value);
+        }
+    }
+    /**
+     * @param sourceFileOrDirectory
+     */
+    private ResourceLoader createResourceLoader(File sourceFileOrDirectory) {
+        return loader = new ResourceLoader(
+            new RdfResourceImporter(indexingDataset),
+            true,sourceFileOrDirectory);
+    }
+    /**
+     * @param modelLocation
+     */
+    private DatasetGraphTDB createRdfModel(File modelLocation) {
+        if(modelLocation.exists() && !modelLocation.isDirectory()){
+            throw new IllegalArgumentException("The configured RDF model directory "+
+                modelLocation+"exists but is not a Directory");
+        } else if(!modelLocation.exists()){
+            if(!modelLocation.mkdirs()){
+                throw new IllegalArgumentException("Unable to create the configured RDF model directory "+
+                    modelLocation+"!");
+            }
+        }
+        Location location = new Location(modelLocation.getAbsolutePath());
+        return TDBFactory.createDatasetGraph(location);
+    }
+    @Override
+    public boolean needsInitialisation() {
+        //if there are resources with the state REGISTERED we need an initialisation
+        return !loader.getResources(ResourceState.REGISTERED).isEmpty();
+    }
+    @Override
+    public void initialise(){
+        loader.loadResources();
+    }
+    @Override
+    public void close() {
+        loader = null;
+        indexingDataset.close();
+    }
+    @Override
+    public EntityDataIterator entityDataIterator() {
+        String enityVar = "s";
+        String fieldVar = "p";
+        String valueVar = "o";
+        StringBuilder qb = new StringBuilder();
+        qb.append(String.format("SELECT ?%s ?%s ?%s \n",
+            enityVar,fieldVar,valueVar)); //for the select
+        qb.append("{ \n");
+        qb.append(String.format("    ?%s ?%s ?%s . \n",
+            enityVar,fieldVar,valueVar)); //for the where
+        qb.append("} \n");
+        log.debug("EntityDataIterator Query: \n"+qb.toString());
+        Query q = QueryFactory.create(qb.toString(), Syntax.syntaxARQ);
+        return new RdfEntityIterator(
+            QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect(),
+            enityVar,fieldVar,valueVar);
+    }
+
+    @Override
+    public Representation getEntityData(String id) {
+        Node resource = Node.createURI(id);
+        Representation source = vf.createRepresentation(id);
+        ExtendedIterator<Triple> outgoing = indexingDataset.getDefaultGraph().find(resource, null, null);
+        boolean found = outgoing.hasNext();
+        while(outgoing.hasNext()){ //iterate over the statements for that resource
+            Triple statement = outgoing.next();
+            Node predicate = statement.getPredicate();
+            if(predicate == null || !predicate.isURI()){
+                log.warn("Ignore field {} for resource {} because it is null or not an URI!",
+                    predicate,resource);
+            } else {
+                String field = predicate.getURI();
+                Node value = statement.getObject();
+                processValue(value, source, field);
+            } //end else predicate != null
+        } //end iteration over resource triple
+        if(found) {
+            return source;
+            //log.info("Resource: \n"+ModelUtils.getRepresentationInfo(source));
+        } else {
+            log.debug("No Statements found for Entity {}!",id);
+            return null;
+        }
+    }
+
+    /**
+     * Processes a {@link Node} and adds the according value to the parsed
+     * Representation.
+     * @param value The node to convert to an value for the Representation
+     * @param source the representation (MUST NOT be <code>null</code>
+     * @param field the field (MUST NOT be <code>null</code>)
+     */
+    private void processValue(Node value, Representation source, String field) {
+        if(value == null){
+            log.warn("Encountered NULL value for field {} and entity {}",
+                    field,source.getId());
+        } else if(value.isURI()){ //add a reference
+            source.addReference(field, value.getURI());
+        } else if(value.isLiteral()){ //add a value or a text depending on the dataType
+            LiteralLabel ll = value.getLiteral();
+//            log.debug("LL: lexical {} | value {} | dataType {} | language {}",
+//                new Object[]{ll.getLexicalForm(),ll.getValue(),ll.getDatatype(),ll.language()});
+            //if the dataType == null , than we can expect a plain literal
+            RDFDatatype dataType = ll.getDatatype();
+            if(dataType != null){ //add a value
+                Object literalValue;
+                try {
+                    literalValue = ll.getValue();
+                    if(literalValue instanceof BaseDatatype.TypedValue){
+                        //used for unknown data types
+                        // -> in such cases yust use the lecial type
+                        source.add(field, ((BaseDatatype.TypedValue)literalValue).lexicalValue);
+                    } else if(literalValue instanceof XSDDateTime) {
+                        source.add(field, ((XSDDateTime)literalValue).asCalendar().getTime()); //Entityhub uses the time
+                    } else if(literalValue instanceof XSDDuration) {
+                        source.add(field, literalValue.toString());
+                    } else {
+                        source.add(field, literalValue);
+                    }
+                } catch (DatatypeFormatException e) {
+                    log.warn(" Unable to convert {} to {} -> use lecicalForm",
+                        ll.getLexicalForm(),ll.getDatatype());
+                    literalValue = ll.getLexicalForm();
+                }
+            } else { //add a text
+                String language = ll.language();
+                if(language!=null && language.length()<1){
+                    language = null;
+                }
+                source.addNaturalText(field, ll.getLexicalForm(), language);
+            }
+            // "" is parsed if there is no language
+        } else {
+            if(value.isBlank()){
+                log.info("ignoreing blank node value {} for field {} and Resource {}!",
+                        new Object[]{value,field,source.getId()});
+            } else {
+                log.warn("ignoreing value {} for field {} and Resource {} because it is of an unsupported type!",
+                        new Object[]{value,field,source.getId()});
+            }
+        } //end different value node type
+    }
+    /**
+     * Implementation of the iterator over the entities stored in a
+     * {@link RdfIndexingSource}. This Iterator is based on query
+     * {@link ResultSet}. It uses the low level SPARQL API because this allows
+     * to use the same code to create values for Representations
+     * @author Rupert Westenthaler
+     *
+     */
+    public final class RdfEntityIterator implements EntityDataIterator {
+        /**
+         * Variable used to
+         */
+        final Var entityVar;
+        final Var fieldVar;
+        final Var valueVar;
+        /**
+         * The result set containing all triples in the form of <code>
+         * "entity -&gt; field -&gt; value"</code>
+         */
+        private final ResultSet resultSet;
+        /**
+         * The {@link Node} representing the current entity or <code>null</code>
+         * if the iterator is newly created.<p>
+         * {@link Node#isURI()} is guaranteed to return <code>true</code> and
+         * {@link Node#getURI()} is guaranteed to return the id for the entity
+         */
+        private Node currentEntity = null;
+        /**
+         * The {@link Node} for the next Entity in the iteration or <code>null</code>
+         * in case there are no further or the iterator is newly created (in that
+         * case {@link #currentEntity} will be also <code>null</code>)<p>
+         * {@link Node#isURI()} is guaranteed to return <code>true</code> and
+         * {@link Node#getURI()} is guaranteed to return the id for the entity
+         */
+        private Node nextEntity = null;
+        /**
+         * The Representation of the current Element. Only available after a
+         * call to {@link #getRepresentation()}
+         */
+        private Representation currentRepresentation = null;
+        /**
+         * Holds all <code>field,value"</code> pairs of the current Entity.
+         * Elements at even positions represent<code>fields</code> and elements 
+         * at uneven positions represent <code>values</code>.
+         */
+        private List<Node> data = new ArrayList<Node>();
+        /**
+         * The next (not consumed) solution of the query. 
+         */
+        private Binding nextBinding = null;
+        
+        protected RdfEntityIterator(ResultSet resultSet, String entityVar,String fieldVar, String valueVar){
+            if(resultSet == null){
+                throw new IllegalArgumentException("The parsed ResultSet MUST NOT be NULL!");
+            }
+            //check if the ResultSet provides the required variables to perform the query
+            List<String> vars = resultSet.getResultVars();
+            if(!vars.contains(entityVar)){
+                throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
+                		"Variable \""+entityVar+"\" representing the Entity!");
+            } else {
+                this.entityVar = Var.alloc(entityVar);
+            }
+            if(!vars.contains(fieldVar)){
+                throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
+                        "Variable \""+fieldVar+"\" representing the Field of an Entity!");
+            } else {
+                this.fieldVar = Var.alloc(fieldVar);
+            }
+            if(!vars.contains(valueVar)){
+                throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
+                        "Variable \""+valueVar+"\" representing the Value of a Field of an Entity!");
+            } else {
+                this.valueVar = Var.alloc(valueVar);
+            }
+            this.resultSet = resultSet;
+            //this will read until the first binding of the first Entity is found
+            initFirst(); 
+        }
+        private void initFirst(){
+            if(currentEntity == null && nextEntity == null){ //only for the first call
+                //consume binding until the first valid entity starts
+                while(nextEntity == null && resultSet.hasNext()){
+                    Binding firstValid = resultSet.nextBinding();
+                    Node entityNode = firstValid.get(entityVar);
+                    if(entityNode.isURI()){ //only uri nodes are valid
+                      //store it temporarily in nextBinding
+                        nextBinding = firstValid; 
+                        //store it as next (first) entity
+                        nextEntity = entityNode;
+                    } else {
+                        log.warn(String.format("Current Entity %s is not a URI Node -> ignored",entityNode));
+                    }
+                }
+            } else {
+                throw new IllegalStateException("This Mehtod MUST be only used for Initialisation!");
+            }
+        }
+        @Override
+        public void close() {
+            data.clear();
+            data = null;
+            currentEntity = null;
+            currentRepresentation = null;
+            //Looks like it is not possible to close a resultSet
+        }
+
+        @Override
+        public Representation getRepresentation() {
+            //current Entity will be null if
+            //  - next() was never called
+            //  - the end of the iteration was reached
+            if(currentEntity == null){ 
+                return null;
+            } else if(currentRepresentation == null){
+                currentRepresentation = createRepresentation();
+            }
+            return currentRepresentation;
+        }
+
+        @Override
+        public boolean hasNext() {
+            return resultSet.hasNext();
+        }
+
+        @Override
+        public String next() {
+            return getNext();
+        }
+
+        @Override
+        public void remove() {
+            throw new UnsupportedOperationException(
+                "Removal of Entities is not supported by this Implementation!");
+        }
+        /**
+         * Iterates over all {@link QuerySolution} of the {@link #resultSet}
+         * that do have {@link #currentEntity} as 
+         * {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}.
+         * NOTES: <ul>
+         * <li>This method also initialises the {@link #data} and sets the 
+         * {@link #nextBinding} to the first solution of the next entity.<br>
+         * <li>That means also, that it would iterate over additional 
+         * {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}
+         * values that are not URIResources ( in cases
+         * {@link RDFNode#isURIResource()} returns <code>false</code>)
+         * <li>This method is also used to initialise the first Entity
+         * @return the URI of the current entity
+         */
+        private String getNext(){
+            //check for more elements
+            if(!resultSet.hasNext()){
+                throw new NoSuchElementException("No more Entities available");
+            }
+            //clean up data of the previous entity
+            this.data.clear(); //remove data of the previous entity
+            this.currentRepresentation = null; //and the representation
+            this.currentEntity = nextEntity; //set the nextEntity to the current
+
+            //and process the first binding already consumed from the resultSet
+            //by calling this method for the previous Entity
+            if(nextBinding != null){ //will be null for the first Entity
+                processSolution(nextBinding);
+            }
+            //now get all the other Solutions for the current entity
+            boolean next = false;
+            while(!next && resultSet.hasNext()){
+                Binding binding = resultSet.nextBinding();
+                Node entityNode = binding.get(entityVar);
+                if(entityNode.isURI()){
+                    if(!entityNode.equals(currentEntity)){
+                        //start of next Entity
+                        this.nextEntity = entityNode; //store the node for the next entity
+                        this.nextBinding = binding; //store the first binding of the next entity
+                        //we are done for this entity -> exit the loop
+                        next = true;
+                    } else {
+                        processSolution(binding);
+                    }
+                } else {
+                    log.warn(String.format("Current Entity %s is not a URI Node -> ignored",entityNode));
+                }
+            }
+            if(!next){ // exit the loop but still no new entity ... that means
+                nextEntity = null; //there are no more entities
+                nextBinding = null; // and there are also no more solutions
+            }
+            return currentEntity.toString();
+        }
+        /**
+         * Processes a {@link Binding} by storing the {@link Node}s for the 
+         * variables {@link #fieldVar} and {@link #valueVar} to {@link #data}.
+         * This method ensures that both values are not <code>null</code> and
+         * that the {@link Node} representing the field is an URI (
+         * returns <code>true</code> for {@link Node#isURI()})
+         * @param binding the binding to process
+         */
+        private void processSolution(Binding binding) {
+            Node field = binding.get(fieldVar);
+            if(field != null && field.isURI()){
+                Node value = binding.get(valueVar);
+                if(value != null){
+                    //add the pair
+                    data.add(field);
+                    data.add(value);
+                }
+            } else {
+                //This may only happen if the Query used to create the ResultSet
+                //containing this Solution does not link the variable
+                //VARIABLE_NAME_FIELD to properties.
+                log.error("Found Field {} for Entity {} that is not an URIResource",field,currentEntity);
+            }
+        }
+        /**
+         * Used to create the Representation the first time 
+         * {@link #getRepresentation()} is called for the current entity. The
+         * information for the Representation are already stored in {@link #data}
+         */
+        private Representation createRepresentation() {
+            Representation representation = vf.createRepresentation(currentEntity.toString());
+            Iterator<Node> it = data.iterator();
+            while(it.hasNext()){ 
+                //data contains field,value pairs
+                //because of that we call two times next for
+                String field = it.next().getURI(); //the field
+                Node value = it.next();//and the value
+                processValue(value, representation, field);
+            }
+            return representation;
+        }
+    }
+    
+}

Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,116 @@
+package org.apache.stanbol.entityhub.indexing.source.jenatdb;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceState;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceImporter;
+import org.openjena.riot.Lang;
+import org.openjena.riot.RiotReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.tdb.TDBLoader;
+import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
+import com.hp.hpl.jena.tdb.store.bulkloader.BulkLoader;
+import com.hp.hpl.jena.tdb.store.bulkloader.Destination;
+import com.hp.hpl.jena.tdb.store.bulkloader.LoadMonitor;
+import com.hp.hpl.jena.tdb.store.bulkloader.LoaderNodeTupleTable;
+
+public class RdfResourceImporter implements ResourceImporter {
+
+    private static final Logger log = LoggerFactory.getLogger(RdfResourceImporter.class);
+    private final DatasetGraphTDB indexingDataset;
+    public RdfResourceImporter(DatasetGraphTDB indexingDataset){
+        if(indexingDataset == null){
+            throw new IllegalArgumentException("The parsed DatasetGraphTDB instance MUST NOT be NULL!");
+        }
+        this.indexingDataset = indexingDataset;
+    }
+
+    @Override
+    public ResourceState importResource(InputStream is, String resourceName) throws IOException {
+        String name = FilenameUtils.getName(resourceName);
+        if ("gz".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
+            is = new GZIPInputStream(is);
+            name = FilenameUtils.removeExtension(name);
+            log.debug("   - from GZIP Archive");
+        } else if ("bz2".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
+            is = new BZip2CompressorInputStream(is);
+            name = FilenameUtils.removeExtension(name);
+            log.debug("   - from BZip2 Archive");
+        }// TODO: No Zip Files inside Zip Files supported :o( ^^
+        Lang format = Lang.guess(name);
+        // For N-Triple we can use the TDBLoader
+        if (format == null) {
+            log.warn("ignore File {} because of unknown extension ");
+            return ResourceState.IGNORED;
+        } else if (format == Lang.NTRIPLES) {
+            TDBLoader.load(indexingDataset, is, true);
+        } else if (format != Lang.RDFXML) {
+            // use RIOT to parse the format but with a special configuration
+            // RiotReader!
+            TDBLoader loader = new TDBLoader();
+            loader.setShowProgress(true);
+            Destination<Triple> dest = createDestination();
+            dest.start();
+            RiotReader.parseTriples(is, format, null, dest);
+            dest.finish();
+        } else { // RDFXML
+            // in that case we need to use ARP
+            Model model = ModelFactory.createModelForGraph(indexingDataset.getDefaultGraph());
+            model.read(is, null);
+        }
+        return ResourceState.LOADED;
+    }
+    /**
+     * Creates a triple destination for the default dataset of the
+     * {@link #indexingDataset}.
+     * This code is based on how Destinations are created in the {@link BulkLoader},
+     * implementation. Note that
+     * {@link BulkLoader#loadDefaultGraph(DatasetGraphTDB, InputStream, boolean)}
+     * can not be used for formats other than {@link Lang#NTRIPLES} because it
+     * hard codes this format for loading data form the parsed InputStream.
+     * @return the destination!
+     */
+    private Destination<Triple> createDestination() {
+        LoadMonitor monitor = new LoadMonitor(indexingDataset, 
+            log, "triples",50000,100000);
+        final LoaderNodeTupleTable loaderTriples = new LoaderNodeTupleTable(
+            indexingDataset.getTripleTable().getNodeTupleTable(), "triples", monitor) ;
+
+        Destination<Triple> sink = new Destination<Triple>() {
+            long count = 0 ;
+            public final void start()
+            {
+                loaderTriples.loadStart() ;
+                loaderTriples.loadDataStart() ;
+            }
+            public final void send(Triple triple)
+            {
+                loaderTriples.load(triple.getSubject(), triple.getPredicate(), 
+                    triple.getObject()) ;
+                count++ ;
+            }
+
+            public final void flush() { }
+            public void close() { }
+
+            public final void finish()
+            {
+                loaderTriples.loadDataFinish() ;
+                loaderTriples.loadIndexStart() ;
+                loaderTriples.loadIndexFinish() ;
+                loaderTriples.loadFinish() ;
+            }
+        } ;
+        return sink ;
+    }
+}

Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,139 @@
+package org.apache.stanbol.entityhub.indexing.source.jenatdb;
+
+import java.util.Iterator;
+
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider;
+import org.apache.stanbol.entityhub.indexing.core.EntityIterator;
+import org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore;
+import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
+import org.apache.stanbol.entityhub.servicesapi.model.Reference;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import static org.junit.Assert.*;
+
+public class RdfIndexingSourceTest {
+    
+    
+    private static final Logger log = LoggerFactory.getLogger(RdfIndexingSourceTest.class);
+    /**
+     * mvn copies the resources in "src/test/resources" to target/test-classes
+     */
+    private static final String TEST_CONFIGS_ROOT = "/target/test-classes/testConfigs/";
+
+    private static final String TEXT_TEST_FIELD = "http://www.geonames.org/ontology#alternateName";
+    private static final String VALUE_TEST_FIELD = "http://www.w3.org/2003/01/geo/wgs84_pos#lat";
+    private static final String REFERENCE_TEST_FIELD = "http://www.w3.org/2002/07/owl#sameAs";
+    
+    private static final long NUMBER_OF_ENTITIES_EXPECTED = 3;
+    
+    /**
+     * The path to the folder used as root for the tests
+     */
+    private static String testRoot;
+    @BeforeClass
+    public static void init(){
+        //initialise based on basedir or user.dir
+        String baseDir = System.getProperty("basedir");
+        if(baseDir == null){
+            baseDir = System.getProperty("user.dir");
+        }
+        testRoot = baseDir+TEST_CONFIGS_ROOT;
+        log.info("Test Root ="+testRoot);
+    }
+    @Test
+    public void testEntityDataIterable(){
+        IndexingConfig config = new IndexingConfig(testRoot+"iterable");
+        EntityDataIterable iterable = config.getDataInterable();
+        assertNotNull(iterable);
+        assertEquals(iterable.getClass(), RdfIndexingSource.class);
+        assertTrue(iterable.needsInitialisation());
+        iterable.initialise();
+        EntityDataIterator it = iterable.entityDataIterator();
+        long count = 0;
+        while(it.hasNext()){
+            String entity = it.next();
+            log.info("validate Entity "+entity);
+            assertNotNull(entity);
+            validateRepresentation(it.getRepresentation(), entity);
+            count++;
+        }
+        //check if all entities where found
+        assertEquals(String.format("%s Entities expected but %s processed!",
+            NUMBER_OF_ENTITIES_EXPECTED,count), 
+            NUMBER_OF_ENTITIES_EXPECTED, count);
+    }
+    @Test
+    public void testEntityDataProvider(){
+        IndexingConfig config = new IndexingConfig(testRoot+"provider");
+        EntityIterator entityIdIterator = config.getEntityIdIterator();
+        assertNotNull("Unable to perform test whithout EntityIterator",entityIdIterator);
+        EntityDataProvider dataProvider = config.getEntityDataProvider();
+        assertNotNull(dataProvider);
+        assertTrue(dataProvider.needsInitialisation());//there are test data to load
+        dataProvider.initialise();
+        assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
+        long count = 0;
+        while(entityIdIterator.hasNext()){
+            EntityScore entityScore = entityIdIterator.next();
+            assertNotNull(entityScore);
+            assertNotNull(entityScore.id);
+            validateRepresentation(dataProvider.getEntityData(entityScore.id),
+                entityScore.id);
+            count++;
+        }
+        //check if all entities where found
+        assertEquals(String.format("%s Entities expected but %s processed!",
+            NUMBER_OF_ENTITIES_EXPECTED,count), 
+            NUMBER_OF_ENTITIES_EXPECTED, count);
+    }
+
+    /**
+     * @param it
+     * @param entity
+     */
+    private void validateRepresentation(Representation rep, String id) {
+        assertNotNull("Representation for Entity with ID "+id+" is null",rep);
+        assertEquals(id, rep.getId());
+        //check if multiple languages are parsed correctly
+        testText(rep);
+        //TODO: need to add XSD dataTypes to the test data
+        //testValue(rep, Double.class);
+        testReference(rep);
+    }
+    private void testText(Representation rep){
+        Iterator<Text> values = rep.getText(TEXT_TEST_FIELD);
+        assertTrue(values.hasNext());
+        while(values.hasNext()){
+            Text text = values.next();
+            assertNotNull(text);
+            String lang = text.getLanguage();
+            //log.info(text.getText()+" | "+text.getLanguage()+" | "+text.getText().endsWith("@"+lang));
+            //this texts that the text does not contain the @{lang} as added by
+            //the toString method of the RDF Literal java class
+            assertFalse("Labels MUST NOT end with the Language! value="+text.getText(),
+                text.getText().endsWith("@"+lang));
+        }
+    }
+    private <T> void testValue(Representation rep, Class<T> type){
+        Iterator<T> values = rep.get(VALUE_TEST_FIELD,type);
+        assertTrue(values.hasNext());
+        while(values.hasNext()){
+            T value = values.next();
+            assertNotNull(value);
+        }
+    }
+    private void testReference(Representation rep){
+        Iterator<Reference> values = rep.getReferences(REFERENCE_TEST_FIELD);
+        assertTrue(values.hasNext());
+        while(values.hasNext()){
+            Reference ref = values.next();
+            assertNotNull(ref);
+        }
+    }
+}

Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexFieldConfig.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexFieldConfig.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexFieldConfig.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexFieldConfig.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1 @@
+#This is the default config that would index everything

Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexFieldConfig.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexing.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexing.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexing.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexing.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,9 @@
+name=simple
+description=Simple Configuration
+
+# use the RDF indexing source as EntityDataProvider
+entityDataProvider=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:testData
+
+# used in the tests to provide the IDs of the Entities in the test data
+entityIdIterator=org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator,source:testEntityIds.txt,charset:UTF-8,encodeIds:false
+

Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexing.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain