You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2011/04/29 11:20:38 UTC
svn commit: r1097740 [7/10] - in /incubator/stanbol/trunk: entityhub/
entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/mapping/
entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/mapping/
entityhu...
Added: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_atom.xsl
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_atom.xsl?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_atom.xsl (added)
+++ incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_atom.xsl Fri Apr 29 09:20:31 2011
@@ -0,0 +1,67 @@
+<?xml version='1.0' encoding='UTF-8'?>
+
+<!--
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ -->
+
+<!--
+ Simple transform of Solr query results to Atom
+ -->
+
+<xsl:stylesheet version='1.0'
+ xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
+
+ <xsl:output
+ method="xml"
+ encoding="utf-8"
+ media-type="text/xml; charset=UTF-8"
+ />
+
+ <xsl:template match='/'>
+ <xsl:variable name="query" select="response/lst[@name='responseHeader']/lst[@name='params']/str[@name='q']"/>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Example Solr Atom 1.0 Feed</title>
+ <subtitle>
+ This has been formatted by the sample "example_atom.xsl" transform -
+ use your own XSLT to get a nicer Atom feed.
+ </subtitle>
+ <author>
+ <name>Apache Solr</name>
+ <email>solr-user@lucene.apache.org</email>
+ </author>
+ <link rel="self" type="application/atom+xml"
+ href="http://localhost:8983/solr/q={$query}&wt=xslt&tr=atom.xsl"/>
+ <updated>
+ <xsl:value-of select="response/result/doc[position()=1]/date[@name='timestamp']"/>
+ </updated>
+ <id>tag:localhost,2007:example</id>
+ <xsl:apply-templates select="response/result/doc"/>
+ </feed>
+ </xsl:template>
+
+ <!-- search results xslt -->
+ <xsl:template match="doc">
+ <xsl:variable name="id" select="str[@name='id']"/>
+ <entry>
+ <title><xsl:value-of select="str[@name='name']"/></title>
+ <link href="http://localhost:8983/solr/select?q={$id}"/>
+ <id>tag:localhost,2007:<xsl:value-of select="$id"/></id>
+ <summary><xsl:value-of select="arr[@name='features']"/></summary>
+ <updated><xsl:value-of select="date[@name='timestamp']"/></updated>
+ </entry>
+ </xsl:template>
+
+</xsl:stylesheet>
Propchange: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_atom.xsl
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_rss.xsl
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_rss.xsl?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_rss.xsl (added)
+++ incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_rss.xsl Fri Apr 29 09:20:31 2011
@@ -0,0 +1,66 @@
+<?xml version='1.0' encoding='UTF-8'?>
+
+<!--
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ -->
+
+<!--
+ Simple transform of Solr query results to RSS
+ -->
+
+<xsl:stylesheet version='1.0'
+ xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
+
+ <xsl:output
+ method="xml"
+ encoding="utf-8"
+ media-type="text/xml; charset=UTF-8"
+ />
+ <xsl:template match='/'>
+ <rss version="2.0">
+ <channel>
+ <title>Example Solr RSS 2.0 Feed</title>
+ <link>http://localhost:8983/solr</link>
+ <description>
+ This has been formatted by the sample "example_rss.xsl" transform -
+ use your own XSLT to get a nicer RSS feed.
+ </description>
+ <language>en-us</language>
+ <docs>http://localhost:8983/solr</docs>
+ <xsl:apply-templates select="response/result/doc"/>
+ </channel>
+ </rss>
+ </xsl:template>
+
+ <!-- search results xslt -->
+ <xsl:template match="doc">
+ <xsl:variable name="id" select="str[@name='id']"/>
+ <xsl:variable name="timestamp" select="date[@name='timestamp']"/>
+ <item>
+ <title><xsl:value-of select="str[@name='name']"/></title>
+ <link>
+ http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/>
+ </link>
+ <description>
+ <xsl:value-of select="arr[@name='features']"/>
+ </description>
+ <pubDate><xsl:value-of select="$timestamp"/></pubDate>
+ <guid>
+ http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/>
+ </guid>
+ </item>
+ </xsl:template>
+</xsl:stylesheet>
Propchange: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/example_rss.xsl
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/luke.xsl
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/luke.xsl?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/luke.xsl (added)
+++ incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/luke.xsl Fri Apr 29 09:20:31 2011
@@ -0,0 +1,337 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+
+<!--
+ Display the luke request handler with graphs
+ -->
+<xsl:stylesheet
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns="http://www.w3.org/1999/xhtml"
+ version="1.0"
+ >
+ <xsl:output
+ method="html"
+ encoding="UTF-8"
+ media-type="text/html; charset=UTF-8"
+ doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
+ doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
+ />
+
+ <xsl:variable name="title">Solr Luke Request Handler Response</xsl:variable>
+
+ <xsl:template match="/">
+ <html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <link rel="stylesheet" type="text/css" href="solr-admin.css"/>
+ <link rel="icon" href="favicon.ico" type="image/ico"/>
+ <link rel="shortcut icon" href="favicon.ico" type="image/ico"/>
+ <title>
+ <xsl:value-of select="$title"/>
+ </title>
+ <xsl:call-template name="css"/>
+
+ </head>
+ <body>
+ <h1>
+ <xsl:value-of select="$title"/>
+ </h1>
+ <div class="doc">
+ <ul>
+ <xsl:if test="response/lst[@name='index']">
+ <li>
+ <a href="#index">Index Statistics</a>
+ </li>
+ </xsl:if>
+ <xsl:if test="response/lst[@name='fields']">
+ <li>
+ <a href="#fields">Field Statistics</a>
+ <ul>
+ <xsl:for-each select="response/lst[@name='fields']/lst">
+ <li>
+ <a href="#{@name}">
+ <xsl:value-of select="@name"/>
+ </a>
+ </li>
+ </xsl:for-each>
+ </ul>
+ </li>
+ </xsl:if>
+ <xsl:if test="response/lst[@name='doc']">
+ <li>
+ <a href="#doc">Document statistics</a>
+ </li>
+ </xsl:if>
+ </ul>
+ </div>
+ <xsl:if test="response/lst[@name='index']">
+ <h2><a name="index"/>Index Statistics</h2>
+ <xsl:apply-templates select="response/lst[@name='index']"/>
+ </xsl:if>
+ <xsl:if test="response/lst[@name='fields']">
+ <h2><a name="fields"/>Field Statistics</h2>
+ <xsl:apply-templates select="response/lst[@name='fields']"/>
+ </xsl:if>
+ <xsl:if test="response/lst[@name='doc']">
+ <h2><a name="doc"/>Document statistics</h2>
+ <xsl:apply-templates select="response/lst[@name='doc']"/>
+ </xsl:if>
+ </body>
+ </html>
+ </xsl:template>
+
+ <xsl:template match="lst">
+ <xsl:if test="parent::lst">
+ <tr>
+ <td colspan="2">
+ <div class="doc">
+ <xsl:call-template name="list"/>
+ </div>
+ </td>
+ </tr>
+ </xsl:if>
+ <xsl:if test="not(parent::lst)">
+ <div class="doc">
+ <xsl:call-template name="list"/>
+ </div>
+ </xsl:if>
+ </xsl:template>
+
+ <xsl:template name="list">
+ <xsl:if test="count(child::*)>0">
+ <table>
+ <thead>
+ <tr>
+ <th colspan="2">
+ <p>
+ <a name="{@name}"/>
+ </p>
+ <xsl:value-of select="@name"/>
+ </th>
+ </tr>
+ </thead>
+ <tbody>
+ <xsl:choose>
+ <xsl:when
+ test="@name='histogram'">
+ <tr>
+ <td colspan="2">
+ <xsl:call-template name="histogram"/>
+ </td>
+ </tr>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:apply-templates/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </tbody>
+ </table>
+ </xsl:if>
+ </xsl:template>
+
+ <xsl:template name="histogram">
+ <div class="doc">
+ <xsl:call-template name="barchart">
+ <xsl:with-param name="max_bar_width">50</xsl:with-param>
+ <xsl:with-param name="iwidth">800</xsl:with-param>
+ <xsl:with-param name="iheight">160</xsl:with-param>
+ <xsl:with-param name="fill">blue</xsl:with-param>
+ </xsl:call-template>
+ </div>
+ </xsl:template>
+
+ <xsl:template name="barchart">
+ <xsl:param name="max_bar_width"/>
+ <xsl:param name="iwidth"/>
+ <xsl:param name="iheight"/>
+ <xsl:param name="fill"/>
+ <xsl:variable name="max">
+ <xsl:for-each select="int">
+ <xsl:sort data-type="number" order="descending"/>
+ <xsl:if test="position()=1">
+ <xsl:value-of select="."/>
+ </xsl:if>
+ </xsl:for-each>
+ </xsl:variable>
+ <xsl:variable name="bars">
+ <xsl:value-of select="count(int)"/>
+ </xsl:variable>
+ <xsl:variable name="bar_width">
+ <xsl:choose>
+ <xsl:when test="$max_bar_width < ($iwidth div $bars)">
+ <xsl:value-of select="$max_bar_width"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$iwidth div $bars"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:variable>
+ <table class="histogram">
+ <tbody>
+ <tr>
+ <xsl:for-each select="int">
+ <td>
+ <xsl:value-of select="."/>
+ <div class="histogram">
+ <xsl:attribute name="style">background-color: <xsl:value-of select="$fill"/>; width: <xsl:value-of select="$bar_width"/>px; height: <xsl:value-of select="($iheight*number(.)) div $max"/>px;</xsl:attribute>
+ </div>
+ </td>
+ </xsl:for-each>
+ </tr>
+ <tr>
+ <xsl:for-each select="int">
+ <td>
+ <xsl:value-of select="@name"/>
+ </td>
+ </xsl:for-each>
+ </tr>
+ </tbody>
+ </table>
+ </xsl:template>
+
+ <xsl:template name="keyvalue">
+ <xsl:choose>
+ <xsl:when test="@name">
+ <tr>
+ <td class="name">
+ <xsl:value-of select="@name"/>
+ </td>
+ <td class="value">
+ <xsl:value-of select="."/>
+ </td>
+ </tr>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="."/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <xsl:template match="int|bool|long|float|double|uuid|date">
+ <xsl:call-template name="keyvalue"/>
+ </xsl:template>
+
+ <xsl:template match="arr">
+ <tr>
+ <td class="name">
+ <xsl:value-of select="@name"/>
+ </td>
+ <td class="value">
+ <ul>
+ <xsl:for-each select="child::*">
+ <li>
+ <xsl:apply-templates/>
+ </li>
+ </xsl:for-each>
+ </ul>
+ </td>
+ </tr>
+ </xsl:template>
+
+ <xsl:template match="str">
+ <xsl:choose>
+ <xsl:when test="@name='schema' or @name='index' or @name='flags'">
+ <xsl:call-template name="schema"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:call-template name="keyvalue"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <xsl:template name="schema">
+ <tr>
+ <td class="name">
+ <xsl:value-of select="@name"/>
+ </td>
+ <td class="value">
+ <xsl:if test="contains(.,'unstored')">
+ <xsl:value-of select="."/>
+ </xsl:if>
+ <xsl:if test="not(contains(.,'unstored'))">
+ <xsl:call-template name="infochar2string">
+ <xsl:with-param name="charList">
+ <xsl:value-of select="."/>
+ </xsl:with-param>
+ </xsl:call-template>
+ </xsl:if>
+ </td>
+ </tr>
+ </xsl:template>
+
+ <xsl:template name="infochar2string">
+ <xsl:param name="i">1</xsl:param>
+ <xsl:param name="charList"/>
+
+ <xsl:variable name="char">
+ <xsl:value-of select="substring($charList,$i,1)"/>
+ </xsl:variable>
+ <xsl:choose>
+ <xsl:when test="$char='I'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='I']"/> - </xsl:when>
+ <xsl:when test="$char='T'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='T']"/> - </xsl:when>
+ <xsl:when test="$char='S'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='S']"/> - </xsl:when>
+ <xsl:when test="$char='M'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='M']"/> - </xsl:when>
+ <xsl:when test="$char='V'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='V']"/> - </xsl:when>
+ <xsl:when test="$char='o'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='o']"/> - </xsl:when>
+ <xsl:when test="$char='p'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='p']"/> - </xsl:when>
+ <xsl:when test="$char='O'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='O']"/> - </xsl:when>
+ <xsl:when test="$char='L'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='L']"/> - </xsl:when>
+ <xsl:when test="$char='B'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='B']"/> - </xsl:when>
+ <xsl:when test="$char='C'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='C']"/> - </xsl:when>
+ <xsl:when test="$char='f'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='f']"/> - </xsl:when>
+ <xsl:when test="$char='l'">
+ <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='l']"/> -
+ </xsl:when>
+ </xsl:choose>
+
+ <xsl:if test="not($i>=string-length($charList))">
+ <xsl:call-template name="infochar2string">
+ <xsl:with-param name="i">
+ <xsl:value-of select="$i+1"/>
+ </xsl:with-param>
+ <xsl:with-param name="charList">
+ <xsl:value-of select="$charList"/>
+ </xsl:with-param>
+ </xsl:call-template>
+ </xsl:if>
+ </xsl:template>
+ <xsl:template name="css">
+ <style type="text/css">
+ <![CDATA[
+ td.name {font-style: italic; font-size:80%; }
+ .doc { margin: 0.5em; border: solid grey 1px; }
+ .exp { display: none; font-family: monospace; white-space: pre; }
+ div.histogram { background: none repeat scroll 0%; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial;}
+ table.histogram { width: auto; vertical-align: bottom; }
+ table.histogram td, table.histogram th { text-align: center; vertical-align: bottom; border-bottom: 1px solid #ff9933; width: auto; }
+ ]]>
+ </style>
+ </xsl:template>
+</xsl:stylesheet>
Propchange: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/config/simple/conf/xslt/luke.xsl
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/resources/testEntityScore.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/resources/testEntityScore.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/resources/testEntityScore.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/resources/testEntityScore.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1,3 @@
+http://www.example.org/entity/test 100
+http://www.example.org/entity/test2 10
+http://www.example.org/entity/test3 1
\ No newline at end of file
Propchange: incubator/stanbol/trunk/entityhub/indexing/destination/solryard/src/test/resources/testConfigs/withSolrConf/indexing/resources/testEntityScore.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml?rev=1097740&r1=1097739&r2=1097740&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/genericrdf/pom.xml Fri Apr 29 09:20:31 2011
@@ -22,9 +22,9 @@
<parent>
<groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.entityhub.parent</artifactId>
+ <artifactId>org.apache.stanbol.entityhub.indexing.parent</artifactId>
<version>0.9-SNAPSHOT</version>
- <relativePath>../../parent</relativePath>
+ <relativePath>../parent</relativePath>
</parent>
<groupId>org.apache.stanbol</groupId>
@@ -52,12 +52,22 @@
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.entityhub.servicesapi</artifactId>
- <version>${stanbol-version}</version>
</dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.entityhub.core</artifactId>
- <version>${stanbol-version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.indexing.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
</dependency>
<dependency>
<groupId>com.hp.hpl.jena</groupId>
@@ -75,10 +85,5 @@
<artifactId>tdb</artifactId>
<version>0.8.8</version>
</dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-compress</artifactId>
- <version>1.0</version>
- </dependency>
</dependencies>
</project>
Modified: incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/java/org/apache/stanbol/entityhub/indexing/rdf/RdfIndexer.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/java/org/apache/stanbol/entityhub/indexing/rdf/RdfIndexer.java?rev=1097740&r1=1097739&r2=1097740&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/java/org/apache/stanbol/entityhub/indexing/rdf/RdfIndexer.java (original)
+++ incubator/stanbol/trunk/entityhub/indexing/genericrdf/src/main/java/org/apache/stanbol/entityhub/indexing/rdf/RdfIndexer.java Fri Apr 29 09:20:31 2011
@@ -489,6 +489,7 @@ public class RdfIndexer {
* from a single stream and therefore gives the OS the best opportunities to
* optimise file access.
* @throws YardException
+ * @see {@link #indexRanked()}
*/
private void indexResources() throws YardException{
StringBuilder qb = new StringBuilder();
@@ -888,48 +889,15 @@ public class RdfIndexer {
}
log.info(" < completed");
}
-//------------------------------------------------------------------------------
-// Other implemented variants with less performance than indexResource3!
-//------------------------------------------------------------------------------
-// private void indexResource2(Resource resource){
-// Query q = QueryFactory.create(String.format(resourceQuery,resource.getURI(),resource.getURI()), Syntax.syntaxARQ);
-// final ResultSet resultSet = QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect();
-// Representation source = vf.createRepresentation(resource.getURI());
-// while(resultSet.hasNext()){
-// QuerySolution solution =resultSet.next();
-// RDFNode fieldNode = solution.get("field");
-// if(fieldNode.isURIResource()){
-// String field = fieldNode.asResource().getURI();
-// RDFNode value = solution.get("value");
-// if(value.isURIResource()){
-// source.addReference(field, value.asResource().getURI());
-// } else if(value.isLiteral()){
-// Literal literal = value.asLiteral();
-// if(literal.getDatatype() != null){
-// Object literalValue;
-// try {
-// literalValue = literal.getValue();
-// } catch (DatatypeFormatException e) {
-// log.warn(" Unable to convert "+literal.getLexicalForm()+" to "+literal.getDatatype()+"-> use lecicalForm");
-// literalValue = literal.getLexicalForm();
-// }
-// if(literalValue instanceof BaseDatatype.TypedValue){
-// source.add(field, literal.getLexicalForm());
-// } else {
-// source.add(field, literal.getValue());
-// }
-// } else {
-// String lang = literal.getLanguage();
-// if(lang != null && lang.isEmpty()){
-// lang = null;
-// }
-// source.addNaturalText(field, literal.getLexicalForm(),lang);
-// }
-// }
-// }
-// }
-// //log.info("S<source Resource:\n"+ModelUtils.getRepresentationInfo(source));
-// }
+
+ /**
+ * This indexing method uses the list of Entities to index as input, queries
+ * for the data and indexes them. This performs a query/entity and therefore
+ * does not provide the same read performance that {@link #indexResources()}.
+ * However where only a small amount of all entities are indexed, this
+ * method will be significant faster.
+ * @throws YardException
+ */
private void indexRanked() throws YardException {
if(entityRankings == null){
throw new IllegalStateException("Unable to index with Etity Ranking Mode if no Entity Rankings are present!");
Modified: incubator/stanbol/trunk/entityhub/indexing/geonames/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/geonames/pom.xml?rev=1097740&r1=1097739&r2=1097740&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/geonames/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/geonames/pom.xml Fri Apr 29 09:20:31 2011
@@ -19,7 +19,8 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
- <!--
+<!-- Do not use a parent because of problems with missing dependencies with
+ mvn assembly:assembly
<parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.entityhub.parent</artifactId>
Added: incubator/stanbol/trunk/entityhub/indexing/parent/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/parent/pom.xml?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/parent/pom.xml (added)
+++ incubator/stanbol/trunk/entityhub/indexing/parent/pom.xml Fri Apr 29 09:20:31 2011
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.parent</artifactId>
+ <version>0.9-SNAPSHOT</version>
+ <relativePath>../../parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.indexing.parent</artifactId>
+ <packaging>pom</packaging>
+
+ <name>Apache Stanbol Entityhub Indexingparent POM</name>
+ <description>
+ Parent POM for the Apache Stanbol Entityhub Indexing component
+ </description>
+
+ <inceptionYear>2010</inceptionYear>
+
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/parent
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/parent
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol</url>
+ </scm>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <inherited>true</inherited>
+ <configuration>
+ <instructions>
+ <Bundle-Category>Stanbol Entityhub Indexing</Bundle-Category>
+ </instructions>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+
+</project>
Propchange: incubator/stanbol/trunk/entityhub/indexing/parent/pom.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/pom.xml?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/pom.xml (added)
+++ incubator/stanbol/trunk/entityhub/indexing/pom.xml Fri Apr 29 09:20:31 2011
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.indexing.parent</artifactId>
+ <version>0.9-SNAPSHOT</version>
+ <relativePath>./parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.indexing.reactor</artifactId>
+ <packaging>pom</packaging>
+
+ <name>Apache Stanbol Entityhub Indexing reactor</name>
+ <description>
+ Pseudo project to build the complete Apache Stanbol Entityhub Indexing component
+ </description>
+
+ <inceptionYear>2010</inceptionYear>
+
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/indexing
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/indexing
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol</url>
+ </scm>
+
+ <modules>
+ <module>parent</module>
+
+ <module>core</module>
+ <module>source/jenatdb</module>
+ <module>destination/solryard</module>
+ <!-- Utils for createing local caches (indexing utils) -->
+ <module>geonames</module>
+ <module>genericrdf</module>
+ <module>dbpedia</module>
+ <module>dblp</module>
+ </modules>
+</project>
Propchange: incubator/stanbol/trunk/entityhub/indexing/pom.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/pom.xml?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/pom.xml (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/pom.xml Fri Apr 29 09:20:31 2011
@@ -0,0 +1,120 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.parent</artifactId>
+ <version>0.9-SNAPSHOT</version>
+ <relativePath>../../../parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.indexing.source.jenatdb</artifactId>
+ <packaging>bundle</packaging>
+ <name>Apache Stanbol Entityhub IndexingSource for RDF using Jena TDB</name>
+ <description>
+ Provides support for indexing RDF data by using an Jena TDB triple store.
+ It supports to used an existing triple store and to create an new one by
+ loading a provided list of RDF Files.
+ This implementation is tested to work even for very large data sets such as
+ http://dbpedia.org dumps.
+ </description>
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/indexing/rdf/jenatdb
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub/indexing/rdf/jenatdb
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol</url>
+ </scm>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Export-Package>
+ org.apache.stanbol.entityhub.indexing.source.jenatdb;version=${project.version}
+ </Export-Package>
+ </instructions>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.servicesapi</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.indexing.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.hp.hpl.jena</groupId>
+ <artifactId>jena</artifactId>
+ <version>2.6.4</version>
+ <exclusions>
+ <exclusion>
+ <artifactId>slf4j-log4j12</artifactId>
+ <groupId>org.slf4j</groupId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.hp.hpl.jena</groupId>
+ <artifactId>tdb</artifactId>
+ <version>0.8.10</version>
+ </dependency>
+ <dependency>
+ <groupId>com.hp.hpl.jena</groupId>
+ <artifactId>arq</artifactId>
+ <version>2.8.8</version>
+ </dependency>
+ <!-- dependencies for testing -->
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-simple</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/pom.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,559 @@
+package org.apache.stanbol.entityhub.indexing.source.jenatdb;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider;
+import org.apache.stanbol.entityhub.indexing.core.IndexingComponent;
+import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceState;
+import org.apache.stanbol.entityhub.servicesapi.model.Reference;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.hp.hpl.jena.datatypes.BaseDatatype;
+import com.hp.hpl.jena.datatypes.DatatypeFormatException;
+import com.hp.hpl.jena.datatypes.RDFDatatype;
+import com.hp.hpl.jena.datatypes.xsd.XSDDateTime;
+import com.hp.hpl.jena.datatypes.xsd.XSDDuration;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.graph.impl.LiteralLabel;
+import com.hp.hpl.jena.query.Query;
+import com.hp.hpl.jena.query.QueryExecutionFactory;
+import com.hp.hpl.jena.query.QueryFactory;
+import com.hp.hpl.jena.query.QuerySolution;
+import com.hp.hpl.jena.query.ResultSet;
+import com.hp.hpl.jena.query.Syntax;
+import com.hp.hpl.jena.rdf.model.RDFNode;
+import com.hp.hpl.jena.sparql.core.Var;
+import com.hp.hpl.jena.sparql.engine.binding.Binding;
+import com.hp.hpl.jena.tdb.TDBFactory;
+import com.hp.hpl.jena.tdb.base.file.Location;
+import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
+import com.hp.hpl.jena.util.iterator.ExtendedIterator;
+/**
+ * Implementation of an {@link IndexingComponent} for Entity data that provides
+ * the possibility to both:<ol>
+ * <li>randomly access entity data via the {@link EntityDataProvider} interface
+ * <li>iterate over all entities in this store via the {@link EntityDataIterator}
+ * interface.
+ * </ol>
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public class RdfIndexingSource implements EntityDataIterable,EntityDataProvider {
+ /**
+ * The Parameter used to configure the source folder(s) relative to the
+ * {@link IndexingConfig#getSourceFolder()}. The ',' (comma) is used as
+ * separator to parsed multiple sources.
+ */
+ public static final String PARAM_SOURCE_FILE_OR_FOLDER = "source";
+ /**
+ * Parameter used to configure the name of the directory used to store the
+ * RDF model (a Jena TDB dataset). The default name is
+ * {@link #DEFAULT_MODEL_DIRECTORY}
+ */
+ public static final String PARAM_MODEL_DIRECTORY = "model";
+ /**
+ * The Parameter that can be used to deactivate the importing of sources.
+ * If this parameter is set to <code>false</code> the values configured for
+ * {@link #PARAM_IMPORT_SOURCE} are ignored. The default value is
+ * <code>true</code>
+ */
+ public static final String PARAM_IMPORT_SOURCE = "import";
+ /**
+ * The default directory name used to search for RDF files to be imported
+ */
+ public static final String DEFAULT_SOURCE_FOLDER_NAME = "rdf";
+ /**
+ * The default name of the folder used to initialise the
+ * {@link DatasetGraphTDB Jena TDB dataset}.
+ */
+ public static final String DEFAULT_MODEL_DIRECTORY = "tdb";
+ //protected to allow internal classes direct access (without hidden getter/
+ //setter added by the compiler that decrease performance)
+ protected final static Logger log = LoggerFactory.getLogger(RdfIndexingSource.class);
+
+ /**
+ * The RDF data
+ */
+ private DatasetGraphTDB indexingDataset;
+ /**
+ * The valueFactory used to create {@link Representation}s, {@link Reference}s
+ * and {@link Text} instances.
+ */
+ private ValueFactory vf;
+
+ private ResourceLoader loader;
+
+ /**
+ * Default Constructor relaying on that {@link #setConfiguration(Map)} is
+ * called afterwards to provide the configuration!
+ */
+ public RdfIndexingSource(){
+ this(null);
+ }
+ /**
+ * Internally used to initialise a {@link ValueFactory}
+ * @param valueFactory
+ */
+ private RdfIndexingSource(ValueFactory valueFactory){
+ if(valueFactory == null){
+ this.vf = InMemoryValueFactory.getInstance();
+ } else {
+ this.vf = valueFactory;
+ }
+ }
+ /**
+ * Constructs an instance based on the provided parameter
+ * @param modelLocation the directory for the RDF model. MUST NOT be NULL
+ * however the parsed {@link File} needs not to exist.
+ * @param sourceFileOrDirectory the source file or directory containing the
+ * file(s) to import. Parse <code>null</code> if no RDF files need to be
+ * imported
+ * @param valueFactory The {@link ValueFactory} used to create instances
+ * or <code>null</code> to use the default implementation.
+ */
+ public RdfIndexingSource(File modelLocation,
+ File sourceFileOrDirectory,
+ ValueFactory valueFactory){
+ if(modelLocation == null){
+ throw new IllegalArgumentException("The parsed model location MUST NOT be NULL!");
+ }
+ //init the store
+ this.indexingDataset = createRdfModel(modelLocation);
+ this.loader = createResourceLoader(sourceFileOrDirectory);
+ }
+ @Override
+ public void setConfiguration(Map<String,Object> config) {
+ IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
+ //first init the RDF Model
+ Object value = config.get(PARAM_MODEL_DIRECTORY);
+ File modelLocation;
+ if(value == null){
+ modelLocation = new File(indexingConfig.getSourceFolder(),DEFAULT_MODEL_DIRECTORY);
+ } else {
+ modelLocation = new File(indexingConfig.getSourceFolder(),value.toString());
+ }
+ this.indexingDataset = createRdfModel(modelLocation);
+ //second we need to check if we need to import RDF files to the RDF model
+ this.loader = createResourceLoader(null); //create the ResourceLoader
+ //check if importing is deactivated
+ boolean importSource = true; //default is true
+ value = config.get(PARAM_IMPORT_SOURCE);
+ if(value != null){
+ importSource = Boolean.parseBoolean(value.toString());
+ }
+ if(importSource){ // if we need to import ... check the source config
+ log.info("Importing RDF data from:");
+ value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
+ if(value == null){ //if not set use the default
+ value = DEFAULT_SOURCE_FOLDER_NAME;
+ }
+ for(String source : value.toString().split(",")){
+ File sourceFileOrDirectory = new File(indexingConfig.getSourceFolder(),source);
+ if(sourceFileOrDirectory.exists()){
+ //register the configured source with the ResourceLoader
+ this.loader.addResource(sourceFileOrDirectory);
+ } else {
+ log.warn("Unable to find RDF source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
+ }
+ }
+ if(log.isInfoEnabled()){
+ for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
+ log.info(" > "+registeredSource);
+ }
+ }
+ } else {
+ log.info("Importing RDF data deactivated by parameer {}={}"+PARAM_IMPORT_SOURCE,value);
+ }
+ }
+ /**
+ * @param sourceFileOrDirectory
+ */
+ private ResourceLoader createResourceLoader(File sourceFileOrDirectory) {
+ return loader = new ResourceLoader(
+ new RdfResourceImporter(indexingDataset),
+ true,sourceFileOrDirectory);
+ }
+ /**
+ * @param modelLocation
+ */
+ private DatasetGraphTDB createRdfModel(File modelLocation) {
+ if(modelLocation.exists() && !modelLocation.isDirectory()){
+ throw new IllegalArgumentException("The configured RDF model directory "+
+ modelLocation+"exists but is not a Directory");
+ } else if(!modelLocation.exists()){
+ if(!modelLocation.mkdirs()){
+ throw new IllegalArgumentException("Unable to create the configured RDF model directory "+
+ modelLocation+"!");
+ }
+ }
+ Location location = new Location(modelLocation.getAbsolutePath());
+ return TDBFactory.createDatasetGraph(location);
+ }
+ @Override
+ public boolean needsInitialisation() {
+ //if there are resources with the state REGISTERED we need an initialisation
+ return !loader.getResources(ResourceState.REGISTERED).isEmpty();
+ }
+ @Override
+ public void initialise(){
+ loader.loadResources();
+ }
+ @Override
+ public void close() {
+ loader = null;
+ indexingDataset.close();
+ }
+ @Override
+ public EntityDataIterator entityDataIterator() {
+ String enityVar = "s";
+ String fieldVar = "p";
+ String valueVar = "o";
+ StringBuilder qb = new StringBuilder();
+ qb.append(String.format("SELECT ?%s ?%s ?%s \n",
+ enityVar,fieldVar,valueVar)); //for the select
+ qb.append("{ \n");
+ qb.append(String.format(" ?%s ?%s ?%s . \n",
+ enityVar,fieldVar,valueVar)); //for the where
+ qb.append("} \n");
+ log.debug("EntityDataIterator Query: \n"+qb.toString());
+ Query q = QueryFactory.create(qb.toString(), Syntax.syntaxARQ);
+ return new RdfEntityIterator(
+ QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect(),
+ enityVar,fieldVar,valueVar);
+ }
+
+ @Override
+ public Representation getEntityData(String id) {
+ Node resource = Node.createURI(id);
+ Representation source = vf.createRepresentation(id);
+ ExtendedIterator<Triple> outgoing = indexingDataset.getDefaultGraph().find(resource, null, null);
+ boolean found = outgoing.hasNext();
+ while(outgoing.hasNext()){ //iterate over the statements for that resource
+ Triple statement = outgoing.next();
+ Node predicate = statement.getPredicate();
+ if(predicate == null || !predicate.isURI()){
+ log.warn("Ignore field {} for resource {} because it is null or not an URI!",
+ predicate,resource);
+ } else {
+ String field = predicate.getURI();
+ Node value = statement.getObject();
+ processValue(value, source, field);
+ } //end else predicate != null
+ } //end iteration over resource triple
+ if(found) {
+ return source;
+ //log.info("Resource: \n"+ModelUtils.getRepresentationInfo(source));
+ } else {
+ log.debug("No Statements found for Entity {}!",id);
+ return null;
+ }
+ }
+
+ /**
+ * Processes a {@link Node} and adds the according value to the parsed
+ * Representation.
+ * @param value The node to convert to an value for the Representation
+ * @param source the representation (MUST NOT be <code>null</code>
+ * @param field the field (MUST NOT be <code>null</code>)
+ */
+ private void processValue(Node value, Representation source, String field) {
+ if(value == null){
+ log.warn("Encountered NULL value for field {} and entity {}",
+ field,source.getId());
+ } else if(value.isURI()){ //add a reference
+ source.addReference(field, value.getURI());
+ } else if(value.isLiteral()){ //add a value or a text depending on the dataType
+ LiteralLabel ll = value.getLiteral();
+// log.debug("LL: lexical {} | value {} | dataType {} | language {}",
+// new Object[]{ll.getLexicalForm(),ll.getValue(),ll.getDatatype(),ll.language()});
+ //if the dataType == null , than we can expect a plain literal
+ RDFDatatype dataType = ll.getDatatype();
+ if(dataType != null){ //add a value
+ Object literalValue;
+ try {
+ literalValue = ll.getValue();
+ if(literalValue instanceof BaseDatatype.TypedValue){
+ //used for unknown data types
+ // -> in such cases yust use the lecial type
+ source.add(field, ((BaseDatatype.TypedValue)literalValue).lexicalValue);
+ } else if(literalValue instanceof XSDDateTime) {
+ source.add(field, ((XSDDateTime)literalValue).asCalendar().getTime()); //Entityhub uses the time
+ } else if(literalValue instanceof XSDDuration) {
+ source.add(field, literalValue.toString());
+ } else {
+ source.add(field, literalValue);
+ }
+ } catch (DatatypeFormatException e) {
+ log.warn(" Unable to convert {} to {} -> use lecicalForm",
+ ll.getLexicalForm(),ll.getDatatype());
+ literalValue = ll.getLexicalForm();
+ }
+ } else { //add a text
+ String language = ll.language();
+ if(language!=null && language.length()<1){
+ language = null;
+ }
+ source.addNaturalText(field, ll.getLexicalForm(), language);
+ }
+ // "" is parsed if there is no language
+ } else {
+ if(value.isBlank()){
+ log.info("ignoreing blank node value {} for field {} and Resource {}!",
+ new Object[]{value,field,source.getId()});
+ } else {
+ log.warn("ignoreing value {} for field {} and Resource {} because it is of an unsupported type!",
+ new Object[]{value,field,source.getId()});
+ }
+ } //end different value node type
+ }
+ /**
+ * Implementation of the iterator over the entities stored in a
+ * {@link RdfIndexingSource}. This Iterator is based on query
+ * {@link ResultSet}. It uses the low level SPARQL API because this allows
+ * to use the same code to create values for Representations
+ * @author Rupert Westenthaler
+ *
+ */
+ public final class RdfEntityIterator implements EntityDataIterator {
+ /**
+ * Variable used to
+ */
+ final Var entityVar;
+ final Var fieldVar;
+ final Var valueVar;
+ /**
+ * The result set containing all triples in the form of <code>
+ * "entity -> field -> value"</code>
+ */
+ private final ResultSet resultSet;
+ /**
+ * The {@link Node} representing the current entity or <code>null</code>
+ * if the iterator is newly created.<p>
+ * {@link Node#isURI()} is guaranteed to return <code>true</code> and
+ * {@link Node#getURI()} is guaranteed to return the id for the entity
+ */
+ private Node currentEntity = null;
+ /**
+ * The {@link Node} for the next Entity in the iteration or <code>null</code>
+ * in case there are no further or the iterator is newly created (in that
+ * case {@link #currentEntity} will be also <code>null</code>)<p>
+ * {@link Node#isURI()} is guaranteed to return <code>true</code> and
+ * {@link Node#getURI()} is guaranteed to return the id for the entity
+ */
+ private Node nextEntity = null;
+ /**
+ * The Representation of the current Element. Only available after a
+ * call to {@link #getRepresentation()}
+ */
+ private Representation currentRepresentation = null;
+ /**
+ * Holds all <code>field,value"</code> pairs of the current Entity.
+ * Elements at even positions represent<code>fields</code> and elements
+ * at uneven positions represent <code>values</code>.
+ */
+ private List<Node> data = new ArrayList<Node>();
+ /**
+ * The next (not consumed) solution of the query.
+ */
+ private Binding nextBinding = null;
+
+ protected RdfEntityIterator(ResultSet resultSet, String entityVar,String fieldVar, String valueVar){
+ if(resultSet == null){
+ throw new IllegalArgumentException("The parsed ResultSet MUST NOT be NULL!");
+ }
+ //check if the ResultSet provides the required variables to perform the query
+ List<String> vars = resultSet.getResultVars();
+ if(!vars.contains(entityVar)){
+ throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
+ "Variable \""+entityVar+"\" representing the Entity!");
+ } else {
+ this.entityVar = Var.alloc(entityVar);
+ }
+ if(!vars.contains(fieldVar)){
+ throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
+ "Variable \""+fieldVar+"\" representing the Field of an Entity!");
+ } else {
+ this.fieldVar = Var.alloc(fieldVar);
+ }
+ if(!vars.contains(valueVar)){
+ throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
+ "Variable \""+valueVar+"\" representing the Value of a Field of an Entity!");
+ } else {
+ this.valueVar = Var.alloc(valueVar);
+ }
+ this.resultSet = resultSet;
+ //this will read until the first binding of the first Entity is found
+ initFirst();
+ }
+ private void initFirst(){
+ if(currentEntity == null && nextEntity == null){ //only for the first call
+ //consume binding until the first valid entity starts
+ while(nextEntity == null && resultSet.hasNext()){
+ Binding firstValid = resultSet.nextBinding();
+ Node entityNode = firstValid.get(entityVar);
+ if(entityNode.isURI()){ //only uri nodes are valid
+ //store it temporarily in nextBinding
+ nextBinding = firstValid;
+ //store it as next (first) entity
+ nextEntity = entityNode;
+ } else {
+ log.warn(String.format("Current Entity %s is not a URI Node -> ignored",entityNode));
+ }
+ }
+ } else {
+ throw new IllegalStateException("This Mehtod MUST be only used for Initialisation!");
+ }
+ }
+ @Override
+ public void close() {
+ data.clear();
+ data = null;
+ currentEntity = null;
+ currentRepresentation = null;
+ //Looks like it is not possible to close a resultSet
+ }
+
+ @Override
+ public Representation getRepresentation() {
+ //current Entity will be null if
+ // - next() was never called
+ // - the end of the iteration was reached
+ if(currentEntity == null){
+ return null;
+ } else if(currentRepresentation == null){
+ currentRepresentation = createRepresentation();
+ }
+ return currentRepresentation;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return resultSet.hasNext();
+ }
+
+ @Override
+ public String next() {
+ return getNext();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException(
+ "Removal of Entities is not supported by this Implementation!");
+ }
+ /**
+ * Iterates over all {@link QuerySolution} of the {@link #resultSet}
+ * that do have {@link #currentEntity} as
+ * {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}.
+ * NOTES: <ul>
+ * <li>This method also initialises the {@link #data} and sets the
+ * {@link #nextBinding} to the first solution of the next entity.<br>
+ * <li>That means also, that it would iterate over additional
+ * {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}
+ * values that are not URIResources ( in cases
+ * {@link RDFNode#isURIResource()} returns <code>false</code>)
+ * <li>This method is also used to initialise the first Entity
+ * @return the URI of the current entity
+ */
+ private String getNext(){
+ //check for more elements
+ if(!resultSet.hasNext()){
+ throw new NoSuchElementException("No more Entities available");
+ }
+ //clean up data of the previous entity
+ this.data.clear(); //remove data of the previous entity
+ this.currentRepresentation = null; //and the representation
+ this.currentEntity = nextEntity; //set the nextEntity to the current
+
+ //and process the first binding already consumed from the resultSet
+ //by calling this method for the previous Entity
+ if(nextBinding != null){ //will be null for the first Entity
+ processSolution(nextBinding);
+ }
+ //now get all the other Solutions for the current entity
+ boolean next = false;
+ while(!next && resultSet.hasNext()){
+ Binding binding = resultSet.nextBinding();
+ Node entityNode = binding.get(entityVar);
+ if(entityNode.isURI()){
+ if(!entityNode.equals(currentEntity)){
+ //start of next Entity
+ this.nextEntity = entityNode; //store the node for the next entity
+ this.nextBinding = binding; //store the first binding of the next entity
+ //we are done for this entity -> exit the loop
+ next = true;
+ } else {
+ processSolution(binding);
+ }
+ } else {
+ log.warn(String.format("Current Entity %s is not a URI Node -> ignored",entityNode));
+ }
+ }
+ if(!next){ // exit the loop but still no new entity ... that means
+ nextEntity = null; //there are no more entities
+ nextBinding = null; // and there are also no more solutions
+ }
+ return currentEntity.toString();
+ }
+ /**
+ * Processes a {@link Binding} by storing the {@link Node}s for the
+ * variables {@link #fieldVar} and {@link #valueVar} to {@link #data}.
+ * This method ensures that both values are not <code>null</code> and
+ * that the {@link Node} representing the field is an URI (
+ * returns <code>true</code> for {@link Node#isURI()})
+ * @param binding the binding to process
+ */
+ private void processSolution(Binding binding) {
+ Node field = binding.get(fieldVar);
+ if(field != null && field.isURI()){
+ Node value = binding.get(valueVar);
+ if(value != null){
+ //add the pair
+ data.add(field);
+ data.add(value);
+ }
+ } else {
+ //This may only happen if the Query used to create the ResultSet
+ //containing this Solution does not link the variable
+ //VARIABLE_NAME_FIELD to properties.
+ log.error("Found Field {} for Entity {} that is not an URIResource",field,currentEntity);
+ }
+ }
+ /**
+ * Used to create the Representation the first time
+ * {@link #getRepresentation()} is called for the current entity. The
+ * information for the Representation are already stored in {@link #data}
+ */
+ private Representation createRepresentation() {
+ Representation representation = vf.createRepresentation(currentEntity.toString());
+ Iterator<Node> it = data.iterator();
+ while(it.hasNext()){
+ //data contains field,value pairs
+ //because of that we call two times next for
+ String field = it.next().getURI(); //the field
+ Node value = it.next();//and the value
+ processValue(value, representation, field);
+ }
+ return representation;
+ }
+ }
+
+}
Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,116 @@
+package org.apache.stanbol.entityhub.indexing.source.jenatdb;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceState;
+import org.apache.stanbol.entityhub.indexing.core.source.ResourceImporter;
+import org.openjena.riot.Lang;
+import org.openjena.riot.RiotReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.tdb.TDBLoader;
+import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
+import com.hp.hpl.jena.tdb.store.bulkloader.BulkLoader;
+import com.hp.hpl.jena.tdb.store.bulkloader.Destination;
+import com.hp.hpl.jena.tdb.store.bulkloader.LoadMonitor;
+import com.hp.hpl.jena.tdb.store.bulkloader.LoaderNodeTupleTable;
+
+public class RdfResourceImporter implements ResourceImporter {
+
+ private static final Logger log = LoggerFactory.getLogger(RdfResourceImporter.class);
+ private final DatasetGraphTDB indexingDataset;
+ public RdfResourceImporter(DatasetGraphTDB indexingDataset){
+ if(indexingDataset == null){
+ throw new IllegalArgumentException("The parsed DatasetGraphTDB instance MUST NOT be NULL!");
+ }
+ this.indexingDataset = indexingDataset;
+ }
+
+ @Override
+ public ResourceState importResource(InputStream is, String resourceName) throws IOException {
+ String name = FilenameUtils.getName(resourceName);
+ if ("gz".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
+ is = new GZIPInputStream(is);
+ name = FilenameUtils.removeExtension(name);
+ log.debug(" - from GZIP Archive");
+ } else if ("bz2".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
+ is = new BZip2CompressorInputStream(is);
+ name = FilenameUtils.removeExtension(name);
+ log.debug(" - from BZip2 Archive");
+ }// TODO: No Zip Files inside Zip Files supported :o( ^^
+ Lang format = Lang.guess(name);
+ // For N-Triple we can use the TDBLoader
+ if (format == null) {
+ log.warn("ignore File {} because of unknown extension ");
+ return ResourceState.IGNORED;
+ } else if (format == Lang.NTRIPLES) {
+ TDBLoader.load(indexingDataset, is, true);
+ } else if (format != Lang.RDFXML) {
+ // use RIOT to parse the format but with a special configuration
+ // RiotReader!
+ TDBLoader loader = new TDBLoader();
+ loader.setShowProgress(true);
+ Destination<Triple> dest = createDestination();
+ dest.start();
+ RiotReader.parseTriples(is, format, null, dest);
+ dest.finish();
+ } else { // RDFXML
+ // in that case we need to use ARP
+ Model model = ModelFactory.createModelForGraph(indexingDataset.getDefaultGraph());
+ model.read(is, null);
+ }
+ return ResourceState.LOADED;
+ }
+ /**
+ * Creates a triple destination for the default dataset of the
+ * {@link #indexingDataset}.
+ * This code is based on how Destinations are created in the {@link BulkLoader},
+ * implementation. Note that
+ * {@link BulkLoader#loadDefaultGraph(DatasetGraphTDB, InputStream, boolean)}
+ * can not be used for formats other than {@link Lang#NTRIPLES} because it
+ * hard codes this format for loading data form the parsed InputStream.
+ * @return the destination!
+ */
+ private Destination<Triple> createDestination() {
+ LoadMonitor monitor = new LoadMonitor(indexingDataset,
+ log, "triples",50000,100000);
+ final LoaderNodeTupleTable loaderTriples = new LoaderNodeTupleTable(
+ indexingDataset.getTripleTable().getNodeTupleTable(), "triples", monitor) ;
+
+ Destination<Triple> sink = new Destination<Triple>() {
+ long count = 0 ;
+ public final void start()
+ {
+ loaderTriples.loadStart() ;
+ loaderTriples.loadDataStart() ;
+ }
+ public final void send(Triple triple)
+ {
+ loaderTriples.load(triple.getSubject(), triple.getPredicate(),
+ triple.getObject()) ;
+ count++ ;
+ }
+
+ public final void flush() { }
+ public void close() { }
+
+ public final void finish()
+ {
+ loaderTriples.loadDataFinish() ;
+ loaderTriples.loadIndexStart() ;
+ loaderTriples.loadIndexFinish() ;
+ loaderTriples.loadFinish() ;
+ }
+ } ;
+ return sink ;
+ }
+}
Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java Fri Apr 29 09:20:31 2011
@@ -0,0 +1,139 @@
+package org.apache.stanbol.entityhub.indexing.source.jenatdb;
+
+import java.util.Iterator;
+
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator;
+import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider;
+import org.apache.stanbol.entityhub.indexing.core.EntityIterator;
+import org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore;
+import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
+import org.apache.stanbol.entityhub.servicesapi.model.Reference;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import static org.junit.Assert.*;
+
+public class RdfIndexingSourceTest {
+
+
+ private static final Logger log = LoggerFactory.getLogger(RdfIndexingSourceTest.class);
+ /**
+ * mvn copies the resources in "src/test/resources" to target/test-classes
+ */
+ private static final String TEST_CONFIGS_ROOT = "/target/test-classes/testConfigs/";
+
+ private static final String TEXT_TEST_FIELD = "http://www.geonames.org/ontology#alternateName";
+ private static final String VALUE_TEST_FIELD = "http://www.w3.org/2003/01/geo/wgs84_pos#lat";
+ private static final String REFERENCE_TEST_FIELD = "http://www.w3.org/2002/07/owl#sameAs";
+
+ private static final long NUMBER_OF_ENTITIES_EXPECTED = 3;
+
+ /**
+ * The path to the folder used as root for the tests
+ */
+ private static String testRoot;
+ @BeforeClass
+ public static void init(){
+ //initialise based on basedir or user.dir
+ String baseDir = System.getProperty("basedir");
+ if(baseDir == null){
+ baseDir = System.getProperty("user.dir");
+ }
+ testRoot = baseDir+TEST_CONFIGS_ROOT;
+ log.info("Test Root ="+testRoot);
+ }
+ @Test
+ public void testEntityDataIterable(){
+ IndexingConfig config = new IndexingConfig(testRoot+"iterable");
+ EntityDataIterable iterable = config.getDataInterable();
+ assertNotNull(iterable);
+ assertEquals(iterable.getClass(), RdfIndexingSource.class);
+ assertTrue(iterable.needsInitialisation());
+ iterable.initialise();
+ EntityDataIterator it = iterable.entityDataIterator();
+ long count = 0;
+ while(it.hasNext()){
+ String entity = it.next();
+ log.info("validate Entity "+entity);
+ assertNotNull(entity);
+ validateRepresentation(it.getRepresentation(), entity);
+ count++;
+ }
+ //check if all entities where found
+ assertEquals(String.format("%s Entities expected but %s processed!",
+ NUMBER_OF_ENTITIES_EXPECTED,count),
+ NUMBER_OF_ENTITIES_EXPECTED, count);
+ }
+ @Test
+ public void testEntityDataProvider(){
+ IndexingConfig config = new IndexingConfig(testRoot+"provider");
+ EntityIterator entityIdIterator = config.getEntityIdIterator();
+ assertNotNull("Unable to perform test whithout EntityIterator",entityIdIterator);
+ EntityDataProvider dataProvider = config.getEntityDataProvider();
+ assertNotNull(dataProvider);
+ assertTrue(dataProvider.needsInitialisation());//there are test data to load
+ dataProvider.initialise();
+ assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
+ long count = 0;
+ while(entityIdIterator.hasNext()){
+ EntityScore entityScore = entityIdIterator.next();
+ assertNotNull(entityScore);
+ assertNotNull(entityScore.id);
+ validateRepresentation(dataProvider.getEntityData(entityScore.id),
+ entityScore.id);
+ count++;
+ }
+ //check if all entities where found
+ assertEquals(String.format("%s Entities expected but %s processed!",
+ NUMBER_OF_ENTITIES_EXPECTED,count),
+ NUMBER_OF_ENTITIES_EXPECTED, count);
+ }
+
+ /**
+ * @param it
+ * @param entity
+ */
+ private void validateRepresentation(Representation rep, String id) {
+ assertNotNull("Representation for Entity with ID "+id+" is null",rep);
+ assertEquals(id, rep.getId());
+ //check if multiple languages are parsed correctly
+ testText(rep);
+ //TODO: need to add XSD dataTypes to the test data
+ //testValue(rep, Double.class);
+ testReference(rep);
+ }
+ private void testText(Representation rep){
+ Iterator<Text> values = rep.getText(TEXT_TEST_FIELD);
+ assertTrue(values.hasNext());
+ while(values.hasNext()){
+ Text text = values.next();
+ assertNotNull(text);
+ String lang = text.getLanguage();
+ //log.info(text.getText()+" | "+text.getLanguage()+" | "+text.getText().endsWith("@"+lang));
+ //this texts that the text does not contain the @{lang} as added by
+ //the toString method of the RDF Literal java class
+ assertFalse("Labels MUST NOT end with the Language! value="+text.getText(),
+ text.getText().endsWith("@"+lang));
+ }
+ }
+ private <T> void testValue(Representation rep, Class<T> type){
+ Iterator<T> values = rep.get(VALUE_TEST_FIELD,type);
+ assertTrue(values.hasNext());
+ while(values.hasNext()){
+ T value = values.next();
+ assertNotNull(value);
+ }
+ }
+ private void testReference(Representation rep){
+ Iterator<Reference> values = rep.getReferences(REFERENCE_TEST_FIELD);
+ assertTrue(values.hasNext());
+ while(values.hasNext()){
+ Reference ref = values.next();
+ assertNotNull(ref);
+ }
+ }
+}
Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexFieldConfig.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexFieldConfig.txt?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexFieldConfig.txt (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexFieldConfig.txt Fri Apr 29 09:20:31 2011
@@ -0,0 +1 @@
+#This is the default config that would index everything
Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexFieldConfig.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexing.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexing.properties?rev=1097740&view=auto
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexing.properties (added)
+++ incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexing.properties Fri Apr 29 09:20:31 2011
@@ -0,0 +1,9 @@
+name=simple
+description=Simple Configuration
+
+# use the RDF indexing source as EntityDataProvider
+entityDataProvider=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:testData
+
+# used in the tests to provide the IDs of the Entities in the test data
+entityIdIterator=org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator,source:testEntityIds.txt,charset:UTF-8,encodeIds:false
+
Propchange: incubator/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/existing/indexing/config/indexing.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain