You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by su...@apache.org on 2011/12/07 16:41:45 UTC
svn commit: r1211477 [2/4] - in /incubator/stanbol/trunk:
cmsadapter/cmis/src/main/java/org/apache/stanbol/cmsadapter/cmis/mapping/
cmsadapter/jcr/src/main/java/org/apache/stanbol/cmsadapter/jcr/mapping/
contenthub/ contenthub/api/src/main/java/org/apa...
Modified: incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/ExploreQueryHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/ExploreQueryHelper.java?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/ExploreQueryHelper.java (original)
+++ incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/ExploreQueryHelper.java Wed Dec 7 15:41:42 2011
@@ -1,27 +1,24 @@
/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.stanbol.contenthub.core.utils;
-import java.util.List;
-
/**
- * Includes static methods that returns SPARQL query strings Queries are
- * executed on graph of entities to find their types and extract semantic
- * information according to entity type's
+ * Includes static methods that returns SPARQL query strings Queries are executed on graph of entities to find
+ * their types and extract semantic information according to entity type's
*
* @author srdc
*
@@ -29,123 +26,156 @@ import java.util.List;
public class ExploreQueryHelper {
- public final static String[] placeTypedProperties = { "country",
- "largestCity", "city", "state", "capital", "isPartOf", "part",
- "deathPlace", "birthPlace", "location" };
-
- public final static String[] personTypedProperties = { "leader",
- "leaderName", "child", "spouse", "partner", "president" };
-
- public final static String[] organizationTypedProperties = { "leaderParty",
- "affiliation", "team", "party", "otherParty", "associatedBand" };
-
- /**
- * Used to find all rdf:type's of the entity
- *
- * @return is SPARQL query finds rdf:type's of an entity
- */
- public final static String entityTypeExtracterQuery() {
- String query = "PREFIX j.3:<http://www.iks-project.eu/ontology/rick/model/>\n"
- + "PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
- + "SELECT DISTINCT ?type\n"
- + "WHERE {\n"
- + "?entity j.3:about ?description.\n"
- + "?description rdf:type ?type\n" + "}\n";
- return query;
- }
-
- /**
- * Creates a query which finds place type entities; <br> country <br> capital <br>
- * largestCity <br> isPartOf <br> part <br> birthPlace <br> deathPlace <br> location <br> ...
- * optionally
- *
- * @return resulted query
- */
- public final static String relatedPlaceQuery() {
- StringBuilder query = new StringBuilder(
- "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
- query.append("PREFIX dbp.ont: <http://dbpedia.org/ontology/>\n");
- query.append("PREFIX about.ns: <http://www.iks-project.eu/ontology/rick/model/>\n");
- query.append("SELECT DISTINCT ");
-
- for (int i = 0; i < placeTypedProperties.length; i++) {
- query.append(" ?" + placeTypedProperties[i]);
- }
- query.append(" \n"
- + "WHERE {\n ?entity about.ns:about ?description .\n");
-
- for (int i = 0; i < placeTypedProperties.length; i++) {
- String var = placeTypedProperties[i];
- query.append("OPTIONAL { ?description dbp.ont:" + var + " ?" + var
- + " }\n");
- }
-
- query.append("}\n");
- return query.toString();
- }
-
- /**
- * creates a query that finds the person typed entities;
- * <br> president
- * <br> spouse
- * <br> leader
- * <br> ... optionally
- * @return resulted query string
- */
- public final static String relatedPersonQuery() {
- StringBuilder query = new StringBuilder(
- "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
- query.append("PREFIX dbp.ont: <http://dbpedia.org/ontology/>\n");
- query.append("PREFIX about.ns: <http://www.iks-project.eu/ontology/rick/model/>\n");
- query.append("SELECT DISTINCT ");
-
- for (int i = 0; i < personTypedProperties.length; i++) {
- query.append(" ?" + personTypedProperties[i]);
- }
- query.append(" \n"
- + "WHERE {\n ?entity about.ns:about ?description .\n");
-
- for (int i = 0; i < personTypedProperties.length; i++) {
- String var = personTypedProperties[i];
- query.append("OPTIONAL { ?description dbp.ont:" + var + " ?" + var
- + " }\n");
- }
-
- query.append("}\n");
- return query.toString();
-
- }
-
- /**
- * creates a query that finds organization typed related entities;
- * <br> associatedBand
- * <br> team
- * <br> party
- * <br> ... optionally
- * @return resulted query String
- */
- public final static String relatedOrganizationQuery() {
- StringBuilder query = new StringBuilder(
- "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
- query.append("PREFIX dbp.ont: <http://dbpedia.org/ontology/>\n");
- query.append("PREFIX about.ns: <http://www.iks-project.eu/ontology/rick/model/>\n");
- query.append("SELECT DISTINCT ");
-
- for (int i = 0; i < personTypedProperties.length; i++) {
- query.append(" ?" + personTypedProperties[i]);
- }
- query.append(" \n"
- + "WHERE {\n ?entity about.ns:about ?description .\n");
-
- for (int i = 0; i < personTypedProperties.length; i++) {
- String var = personTypedProperties[i];
- query.append("OPTIONAL { ?description dbp.ont:" + var + " ?" + var
- + " }\n");
- }
-
- query.append("}\n");
- return query.toString();
-
- }
+ /**
+ * dbpedia-owl:place ranged properties for related places
+ */
+ public final static String[] placeTypedProperties = {"country", "largestCity", "city", "state",
+ "capital", "isPartOf", "part", "deathPlace",
+ "birthPlace", "location"};
+
+ /**
+ * dbpedia-owl:person ranged properties for related persons
+ */
+ public final static String[] personTypedProperties = {"leader", "leaderName", "child", "spouse",
+ "partner", "president"};
+
+ /**
+ * dbpedia-owl:organization ranged properties for related organizations
+ */
+ public final static String[] organizationTypedProperties = {"leaderParty", "affiliation", "team",
+ "party", "otherParty", "associatedBand"};
+
+ /**
+ * Used to find all rdf:type's of the entity
+ *
+ * @return is SPARQL query finds rdf:type's of an entity
+ */
+ public final static String entityTypeExtracterQuery() {
+ String query = "PREFIX j.3:<http://www.iks-project.eu/ontology/rick/model/>\n"
+ + "PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
+ + "SELECT DISTINCT ?type\n" + "WHERE {\n" + "?entity j.3:about ?description.\n"
+ + "?description rdf:type ?type\n" + "}\n";
+ return query;
+ }
+
+ /**
+ * Creates a query which finds place type entities; <br>
+ * country <br>
+ * capital <br>
+ * largestCity <br>
+ * isPartOf <br>
+ * part <br>
+ * birthPlace <br>
+ * deathPlace <br>
+ * location <br>
+ * ... optionally
+ *
+ * @return resulted query
+ */
+ public final static String relatedPlaceQuery() {
+ StringBuilder query = new StringBuilder("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
+ query.append("PREFIX dbp.ont: <http://dbpedia.org/ontology/>\n");
+ query.append("PREFIX about.ns: <http://www.iks-project.eu/ontology/rick/model/>\n");
+ query.append("SELECT DISTINCT ");
+
+ for (int i = 0; i < placeTypedProperties.length; i++) {
+ query.append(" ?" + placeTypedProperties[i]);
+ }
+ query.append(" \n" + "WHERE {\n ?entity about.ns:about ?description .\n");
+
+ for (int i = 0; i < placeTypedProperties.length; i++) {
+ String var = placeTypedProperties[i];
+ query.append("OPTIONAL { ?description dbp.ont:" + var + " ?" + var + " }\n");
+ }
+
+ query.append("}\n");
+ return query.toString();
+ }
+
+ /**
+ * creates a query that finds the person typed entities; <br>
+ * president <br>
+ * spouse <br>
+ * leader <br>
+ * ... optionally
+ *
+ * @return resulted query string
+ */
+ public final static String relatedPersonQuery() {
+ StringBuilder query = new StringBuilder("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
+ query.append("PREFIX dbp.ont: <http://dbpedia.org/ontology/>\n");
+ query.append("PREFIX about.ns: <http://www.iks-project.eu/ontology/rick/model/>\n");
+ query.append("SELECT DISTINCT ");
+
+ for (int i = 0; i < personTypedProperties.length; i++) {
+ query.append(" ?" + personTypedProperties[i]);
+ }
+ query.append(" \n" + "WHERE {\n ?entity about.ns:about ?description .\n");
+
+ for (int i = 0; i < personTypedProperties.length; i++) {
+ String var = personTypedProperties[i];
+ query.append("OPTIONAL { ?description dbp.ont:" + var + " ?" + var + " }\n");
+ }
+
+ query.append("}\n");
+ return query.toString();
+
+ }
+
+ /**
+ * creates a query that finds organization typed related entities; <br>
+ * associatedBand <br>
+ * team <br>
+ * party <br>
+ * ... optionally
+ *
+ * @return resulted query String
+ */
+ public final static String relatedOrganizationQuery() {
+ StringBuilder query = new StringBuilder("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
+ query.append("PREFIX dbp.ont: <http://dbpedia.org/ontology/>\n");
+ query.append("PREFIX about.ns: <http://www.iks-project.eu/ontology/rick/model/>\n");
+ query.append("SELECT DISTINCT ");
+
+ for (int i = 0; i < organizationTypedProperties.length; i++) {
+ query.append(" ?" + organizationTypedProperties[i]);
+ }
+ query.append(" \n" + "WHERE {\n ?entity about.ns:about ?description .\n");
+
+ for (int i = 0; i < organizationTypedProperties.length; i++) {
+ String var = organizationTypedProperties[i];
+ query.append("OPTIONAL { ?description dbp.ont:" + var + " ?" + var + " }\n");
+ }
+
+ query.append("}\n");
+ return query.toString();
+
+ }
+
+ /**
+ * finds and returns the index of the location of <br>
+ * - last occurence of # , if fails <br>
+ * - last occurence of / , if fails <br>
+ * - last occurence of : , if fails length of the string, if string is null, then returns -1;
+ *
+ * @param URI
+ * is the URI that whose namespace will be splitted
+ * @return is the index of valid splitter
+ */
+ public static int splitNameSpaceFromURI(String URI) {
+ int index = -1;
+
+ index = URI.lastIndexOf("#");
+ if (index != -1) return index + 1;
+
+ index = URI.lastIndexOf("/");
+ if (index != -1) return index + 1;
+
+ index = URI.lastIndexOf(":");
+ if (index != -1) return index + 1;
+
+ index = URI.length();
+ return index;
+ }
}
Added: incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/IndexingUtil.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/IndexingUtil.java?rev=1211477&view=auto
==============================================================================
--- incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/IndexingUtil.java (added)
+++ incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/IndexingUtil.java Wed Dec 7 15:41:42 2011
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.contenthub.core.utils;
+
+import java.util.List;
+
+import org.apache.stanbol.cmsadapter.servicesapi.helper.CMSAdapterVocabulary;
+import org.apache.stanbol.contenthub.servicesapi.search.vocabulary.SearchVocabulary;
+
+import com.hp.hpl.jena.ontology.OntClass;
+import com.hp.hpl.jena.ontology.OntModel;
+import com.hp.hpl.jena.ontology.OntResource;
+import com.hp.hpl.jena.rdf.model.Property;
+import com.hp.hpl.jena.rdf.model.Resource;
+import com.hp.hpl.jena.rdf.model.ResourceFactory;
+import com.hp.hpl.jena.rdf.model.Statement;
+import com.hp.hpl.jena.vocabulary.RDF;
+
+/**
+ * This class is created to create LARQ index of external ontology provided for the search operation
+ */
+public class IndexingUtil {
+ public static void addIndexPropertyToOntResources(OntModel model) {
+ // Add class names
+ for (OntClass klass : model.listClasses().toList()) {
+ if (klass == null || klass.isAnon()) continue;
+ klass.addProperty(SearchVocabulary.HAS_LOCAL_NAME, klass.getLocalName());
+ }
+ // Add individual names
+ for (OntResource ind : model.listIndividuals().toList()) {
+ if (ind == null || ind.isAnon()) continue;
+ ind.addProperty(SearchVocabulary.HAS_LOCAL_NAME, ind.getLocalName());
+ }
+
+ // Add CMS objects
+ Resource cmsObject = ResourceFactory.createResource(CMSAdapterVocabulary.CMS_OBJECT
+ .getUnicodeString());
+ List<Statement> cmsOBjects = model.listStatements(null, RDF.type, cmsObject).toList();
+ for (Statement stmt : cmsOBjects) {
+ Resource subject = stmt.getSubject();
+ /*
+ * As index is created based on SearchVocabulary.HAS_LOCAL_NAME property, it is necessary to add
+ * name of CMS Objects in that property.
+ */
+ String name = getCMSObjectName(subject);
+ if (!name.equals("")) {
+ Statement s = ResourceFactory.createStatement(subject, SearchVocabulary.HAS_LOCAL_NAME,
+ ResourceFactory.createPlainLiteral(name));
+ model.add(s);
+ }
+ }
+ }
+
+ public static String getCMSObjectName(Resource subject) {
+ String name = "";
+ Property cmsNameProp = ResourceFactory.createProperty(CMSAdapterVocabulary.CMS_OBJECT_NAME
+ .getUnicodeString());
+ if (subject.hasProperty(cmsNameProp)) {
+ name = subject.getProperty(cmsNameProp).getString();
+ }
+ return name;
+ }
+}
Modified: incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/JSONUtils.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/JSONUtils.java?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/JSONUtils.java (original)
+++ incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/JSONUtils.java Wed Dec 7 15:41:42 2011
@@ -120,7 +120,8 @@ public class JSONUtils {
JSONObject jObj = new JSONObject(sci.getConstraints());
try {
jObj.put("content", content);
- jObj.put("id", sci.getId());
+ jObj.put("mimeType", sci.getMimeType());
+ jObj.put("id", ContentItemIDOrganizer.detachBaseURI(sci.getId()));
} catch (JSONException e) {
logger.error("Cannot create the JSON Object.", e);
}
Added: incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/NearByFinder.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/NearByFinder.java?rev=1211477&view=auto
==============================================================================
--- incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/NearByFinder.java (added)
+++ incubator/stanbol/trunk/contenthub/core/src/main/java/org/apache/stanbol/contenthub/core/utils/NearByFinder.java Wed Dec 7 15:41:42 2011
@@ -0,0 +1,92 @@
+package org.apache.stanbol.contenthub.core.utils;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.hp.hpl.jena.ontology.OntModel;
+import com.hp.hpl.jena.query.QueryExecutionFactory;
+import com.hp.hpl.jena.query.QuerySolution;
+import com.hp.hpl.jena.query.ResultSet;
+import com.hp.hpl.jena.rdf.model.RDFNode;
+
+/**
+ * is written to find nearby places of dbpedia/place typed entities.
+ * While finding nearby places, first gets the dbpedia/region and dbpedia/isPartOf
+ * properties of the entity. Later on queries the dbpedia to find another entities
+ * that have the same values at property dbpedia/region , dbpedia/isPartOf
+ * @author srdc
+ *
+ */
+public class NearByFinder {
+
+ private static final Logger logger = LoggerFactory.getLogger(NearByFinder.class);
+
+ private List<String> nearByProperties;
+
+ public boolean canFindNearby(OntModel resultModel) {
+ StringBuilder qb = new StringBuilder(
+ "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
+ qb.append("PREFIX dbp.ont: <http://dbpedia.org/ontology/>\n");
+ qb.append("PREFIX about.ns: <http://www.iks-project.eu/ontology/rick/model/>\n");
+ qb.append("ASK {\n");
+ qb.append("?entity about.ns:about ?description.\n");
+ qb.append("OPTIONAL {?description dbp.ont:region ?region}\n ");
+ qb.append("OPTIONAL {?description dbp.ont:isPartOf ?isPartOf}\n ");
+ qb.append("}");
+
+ String query = qb.toString();
+
+ if (resultModel != null) {
+ boolean result = QueryExecutionFactory.create(query,resultModel).execAsk();
+ logger.info("Ask query executed successfully and nearcy places can be found is {}",result);
+ return result;
+ }
+
+ logger.info("Entity Model is null, no nearby places can be found");
+
+ return false;
+ }
+
+ public NearByFinder(OntModel resultModel)
+ {
+ nearByProperties = new ArrayList<String>();
+
+ if (canFindNearby(resultModel)) {
+ StringBuilder qb = new StringBuilder(
+ "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
+ qb.append("PREFIX dbp.ont: <http://dbpedia.org/ontology/>\n");
+ qb.append("PREFIX about.ns: <http://www.iks-project.eu/ontology/rick/model/>\n");
+ qb.append("SELECT DISTINCT ?isPartOf ?region WHERE {\n");
+ qb.append("?entity about.ns:about ?description.\n");
+ qb.append("OPTIONAL { ?description dbp.ont:isPartOf ?isPartOf}\n");
+ qb.append("OPTIONAL { ?description dbp.ont:region ?region}\n");
+ qb.append("}");
+
+ String query = qb.toString();
+
+ ResultSet resultSet = QueryExecutionFactory.create(query,resultModel).execSelect();
+ while(resultSet.hasNext()) {
+ QuerySolution sol = resultSet.next();
+ RDFNode region = sol.get("region");
+ RDFNode isPartOf = sol.get("isPartOf");
+
+ if(region != null) {
+ nearByProperties.add(region.toString());
+ }
+
+ if(isPartOf != null) {
+ nearByProperties.add(isPartOf.toString());
+ }
+ }
+
+ }
+
+ logger.warn("Nearby places of entity CAN NOT be FOUND");
+ }
+
+
+
+}
Modified: incubator/stanbol/trunk/contenthub/core/src/main/resources/solr/core/contenthub.solrindex.zip
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/core/src/main/resources/solr/core/contenthub.solrindex.zip?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
Binary files - no diff available.
Modified: incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/CNNImporter.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/CNNImporter.java?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/CNNImporter.java (original)
+++ incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/CNNImporter.java Wed Dec 7 15:41:42 2011
@@ -18,7 +18,7 @@
package org.apache.stanbol.contenthub.helper.cnn;
import java.net.URI;
-import java.util.List;
+import java.util.Map;
/**
*
@@ -27,6 +27,6 @@ import java.util.List;
*/
public interface CNNImporter {
- List<URI> importCNNNews(String topic, int maxNumber, boolean fullNews);
+ Map<URI, String> importCNNNews(String topic, int maxNumber, boolean fullNews);
}
Modified: incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/impl/CNNImporterImpl.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/impl/CNNImporterImpl.java?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/impl/CNNImporterImpl.java (original)
+++ incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/impl/CNNImporterImpl.java Wed Dec 7 15:41:42 2011
@@ -23,7 +23,9 @@ import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
@@ -69,9 +71,9 @@ public class CNNImporterImpl implements
}
@Override
- public List<URI> importCNNNews(String topic, int maxNumber, boolean fullNews) {
+ public Map<URI,String> importCNNNews(String topic, int maxNumber, boolean fullNews) {
List<NewsSummary> summaries = getRelatedNews(topic, maxNumber);
- List<URI> uris = new ArrayList<URI>();
+ Map<URI,String> newsInfo = new HashMap<URI, String>();
if (fullNews) {
for (NewsSummary summary : summaries) {
String realContent = getNewsContent(summary.getNewsURI());
@@ -83,18 +85,19 @@ public class CNNImporterImpl implements
for (NewsSummary summary : summaries) {
try {
- SolrContentItem sci = solrStore.create(null, summary.getContent().getBytes(), "text/plain",
- null);
+ SolrContentItem sci = solrStore.create(null, summary.getTitle(), summary.getContent()
+ .getBytes(), "text/plain", summary.getTitleConstraint());
URI uri = new URI(solrStore.enhanceAndPut(sci));
+ String title = summary.getTitle();
if (uri != null) {
- uris.add(uri);
+ newsInfo.put(uri, title);
}
} catch (Exception e) {
logger.error("", e);
logger.warn("Error storing content {}. Skipping ...", summary.getContent());
}
}
- return uris;
+ return newsInfo;
}
private String getNewsContent(URI newsURI) {
@@ -161,8 +164,10 @@ public class CNNImporterImpl implements
try {
String summary = current.getElementsByTagName("p").item(0).getFirstChild().getNodeValue();
String uri = ((Element) current.getElementsByTagName("a").item(0)).getAttribute("href");
+ String title = current.getElementsByTagName("a").item(0).getFirstChild().getNodeValue();
newsSummary = new NewsSummary();
newsSummary.setNewsURI(new URI(uri));
+ newsSummary.setTitle(title);
newsSummary.setContent(summary);
} catch (Exception e) {
newsSummary = null;
Modified: incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/impl/NewsSummary.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/impl/NewsSummary.java?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/impl/NewsSummary.java (original)
+++ incubator/stanbol/trunk/contenthub/helper/cnn-importer/src/main/java/org/apache/stanbol/contenthub/helper/cnn/impl/NewsSummary.java Wed Dec 7 15:41:42 2011
@@ -18,6 +18,12 @@
package org.apache.stanbol.contenthub.helper.cnn.impl;
import java.net.URI;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.stanbol.contenthub.servicesapi.store.vocabulary.SolrVocabulary;
/**
*
@@ -27,6 +33,7 @@ import java.net.URI;
public class NewsSummary {
private URI newsURI;
+ private String title;
private String content;
public void setNewsURI(URI newsURI) {
@@ -45,4 +52,21 @@ public class NewsSummary {
return content;
}
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public Map<String, List<Object>> getTitleConstraint() {
+ if(title == null || title.isEmpty()) return null;
+ Map<String, List<Object>> titleConstraint = new HashMap<String,List<Object>>();
+ List<Object> titleList = new ArrayList<Object>(1);
+ titleList.add(title);
+ titleConstraint.put(SolrVocabulary.SolrFieldName.TITLE.toString(), titleList);
+ return titleConstraint;
+ }
+
}
Modified: incubator/stanbol/trunk/contenthub/parent/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/parent/pom.xml?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/parent/pom.xml (original)
+++ incubator/stanbol/trunk/contenthub/parent/pom.xml Wed Dec 7 15:41:42 2011
@@ -210,4 +210,11 @@
</dependencies>
</dependencyManagement>
+ <repositories>
+ <repository>
+ <id>at.newmedialab</id>
+ <name>KiWi Project Snapshot Repository</name>
+ <url>http://devel.kiwi-project.eu:8080/nexus/content/repositories/snapshots/</url>
+ </repository>
+ </repositories>
</project>
Modified: incubator/stanbol/trunk/contenthub/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/pom.xml?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/pom.xml (original)
+++ incubator/stanbol/trunk/contenthub/pom.xml Wed Dec 7 15:41:42 2011
@@ -54,6 +54,7 @@
<module>search</module>
<module>helper/cnn-importer</module>
<module>web</module>
+ <!--<module>ldpath</module>-->
<module>bundlelist</module>
</modules>
</project>
Modified: incubator/stanbol/trunk/contenthub/search/engines/ontologyresource/src/main/java/org/apache/stanbol/contenthub/search/engines/ontologyresource/OntologyResourceSearchEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/ontologyresource/src/main/java/org/apache/stanbol/contenthub/search/engines/ontologyresource/OntologyResourceSearchEngine.java?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/search/engines/ontologyresource/src/main/java/org/apache/stanbol/contenthub/search/engines/ontologyresource/OntologyResourceSearchEngine.java (original)
+++ incubator/stanbol/trunk/contenthub/search/engines/ontologyresource/src/main/java/org/apache/stanbol/contenthub/search/engines/ontologyresource/OntologyResourceSearchEngine.java Wed Dec 7 15:41:42 2011
@@ -18,6 +18,7 @@
package org.apache.stanbol.contenthub.search.engines.ontologyresource;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.felix.scr.annotations.Component;
@@ -30,6 +31,7 @@ import org.apache.stanbol.contenthub.ser
import org.apache.stanbol.contenthub.servicesapi.search.execution.Keyword;
import org.apache.stanbol.contenthub.servicesapi.search.execution.QueryKeyword;
import org.apache.stanbol.contenthub.servicesapi.search.execution.SearchContext;
+import org.apache.stanbol.contenthub.servicesapi.search.execution.Keyword.RelatedKeywordSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -40,7 +42,11 @@ import com.hp.hpl.jena.query.QuerySoluti
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.larq.IndexLARQ;
import com.hp.hpl.jena.query.larq.LARQ;
+import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
+import com.hp.hpl.jena.rdf.model.Resource;
+import com.hp.hpl.jena.rdf.model.ResourceFactory;
+import com.hp.hpl.jena.rdf.model.Statement;
/**
*
@@ -56,9 +62,6 @@ public class OntologyResourceSearchEngin
public void search(SearchContext searchContext) throws SearchEngineException {
for (QueryKeyword qk : searchContext.getQueryKeyWords()) {
searchForKeyword(qk, searchContext);
- for (Keyword kw : qk.getRelatedKeywords()) {
- searchForKeyword(kw, searchContext);
- }
}
}
@@ -80,6 +83,14 @@ public class OntologyResourceSearchEngin
LARQ.setDefaultIndex(individualQExec.getContext(), index);
result = individualQExec.execSelect();
processIndividualResultSet(result, kw, searchContext);
+
+ // CMS Objects
+ query = QueryFactory.getCMSObjectQuery(kw.getKeyword());
+ QueryExecution cmsObjectQueryExec = QueryExecutionFactory.create(query,
+ searchContext.getSearchModel());
+ LARQ.setDefaultIndex(cmsObjectQueryExec.getContext(), index);
+ result = cmsObjectQueryExec.execSelect();
+ processCMSObjectResultSet(result, kw, searchContext);
} else {
logger.warn("Keyword Engine skipped since no index for search model");
}
@@ -120,4 +131,32 @@ public class OntologyResourceSearchEngin
ClosureHelper.getInstance(context).computeIndividualClosure(ir, 6, 1.5, kw);
}
}
+
+ private void processCMSObjectResultSet(ResultSet result, Keyword kw, SearchContext context) {
+ Map<String,Double> results = new HashMap<String,Double>();
+ while (result.hasNext()) {
+ QuerySolution resultBinding = result.nextSolution();
+ RDFNode rdfNode = resultBinding.get("cmsobject");
+ double score = resultBinding.getLiteral("score").getDouble();
+ if (rdfNode.isURIResource()) {
+ String uri = rdfNode.asResource().getURI();
+ results.put(uri, score);
+ }
+ }
+ Property subsumptionProp = ResourceFactory
+ .createProperty("http://www.apache.org/stanbol/cms#parentRef");
+ Property nameProp = ResourceFactory.createProperty("http://www.apache.org/stanbol/cms#name");
+ for (String uri : results.keySet()) {
+ Resource keywordResource = ResourceFactory.createResource(uri);
+ List<Statement> nameStatements = context.getSearchModel()
+ .listStatements(keywordResource, nameProp, (RDFNode) null).toList();
+ if (nameStatements.size() > 0) {
+ String keyword = nameStatements.get(0).getString();
+ context.getFactory().createKeyword(keyword, results.get(uri), kw.getRelatedQueryKeyword(),RelatedKeywordSource.ONTOLOGYRESOURCE.getName());
+ double initialScore = results.get(uri);
+ ClosureHelper.getInstance(context).computeClosureWithProperty(keywordResource,
+ subsumptionProp, 2, initialScore, 1.5, kw);
+ }
+ }
+ }
}
Modified: incubator/stanbol/trunk/contenthub/search/engines/ontologyresource/src/main/java/org/apache/stanbol/contenthub/search/engines/ontologyresource/QueryFactory.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/ontologyresource/src/main/java/org/apache/stanbol/contenthub/search/engines/ontologyresource/QueryFactory.java?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/search/engines/ontologyresource/src/main/java/org/apache/stanbol/contenthub/search/engines/ontologyresource/QueryFactory.java (original)
+++ incubator/stanbol/trunk/contenthub/search/engines/ontologyresource/src/main/java/org/apache/stanbol/contenthub/search/engines/ontologyresource/QueryFactory.java Wed Dec 7 15:41:42 2011
@@ -40,6 +40,7 @@ public final class QueryFactory {
// private static final String SKOS = "PREFIX skos: <http://www.w3.org/2004/02/skos/core#>";
private static final String PF = "PREFIX pf: <http://jena.hpl.hp.com/ARQ/property#>";
private static final String SEARCH = "PREFIX ss: <" + SearchVocabulary.getUri() + ">";
+ private static final String CMS = "PREFIX cms: <http://www.apache.org/stanbol/cms#>";
public static final Query getClassQuery(String keyword) {
@@ -70,6 +71,21 @@ public final class QueryFactory {
sb.append("}");
return com.hp.hpl.jena.query.QueryFactory.create(sb.toString());
}
+
+ public static final Query getCMSObjectQuery(String keyword) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(RDF).append("\n");
+ sb.append(OWL).append("\n");
+ sb.append(PF).append("\n");
+ sb.append(SEARCH).append("\n");
+ sb.append(CMS).append("\n");
+ sb.append("SELECT ?cmsobject ?score WHERE {\n");
+ sb.append("\t?cmsobject rdf:type cms:CMSObject.\n");
+ sb.append("\t?cmsobject ss:hasLocalName ?name.\n");
+ sb.append("\t(?name ?score) pf:textMatch '+" + normalizeKeyword(keyword) + "'.\n");
+ sb.append("}");
+ return com.hp.hpl.jena.query.QueryFactory.create(sb.toString());
+ }
private static String normalizeKeyword(String keyword) {
return keyword.replace("'", "");
Modified: incubator/stanbol/trunk/contenthub/search/engines/solr/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/solr/pom.xml?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/search/engines/solr/pom.xml (original)
+++ incubator/stanbol/trunk/contenthub/search/engines/solr/pom.xml Wed Dec 7 15:41:42 2011
@@ -1,20 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
@@ -61,25 +55,25 @@
<artifactId>org.apache.stanbol.contenthub.core</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.commons.solr.core</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.commons.solr.managed</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-compress</artifactId>
- </dependency>
- <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.commons.solr.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.commons.solr.managed</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ </dependency>
+ <dependency>
<groupId>org.apache.felix</groupId>
<artifactId>org.apache.felix.scr.annotations</artifactId>
</dependency>
- <dependency>
- <groupId>org.osgi</groupId>
- <artifactId>org.osgi.core</artifactId>
- </dependency>
+ <dependency>
+ <groupId>org.osgi</groupId>
+ <artifactId>org.osgi.core</artifactId>
+ </dependency>
<dependency>
<groupId>org.osgi</groupId>
<artifactId>org.osgi.compendium</artifactId>
@@ -98,12 +92,12 @@
</dependency>
<!-- Solr Bundles -->
<dependency>
- <groupId>org.apache.solr</groupId>
- <artifactId>solr-solrj</artifactId>
- </dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-solrj</artifactId>
+ </dependency>
<dependency>
- <groupId>org.apache.solr</groupId>
- <artifactId>solr-core</artifactId>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-core</artifactId>
</dependency>
</dependencies>
</project>
Modified: incubator/stanbol/trunk/contenthub/search/engines/solr/src/main/java/org/apache/stanbol/contenthub/search/engines/solr/SolrSearchEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/solr/src/main/java/org/apache/stanbol/contenthub/search/engines/solr/SolrSearchEngine.java?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/search/engines/solr/src/main/java/org/apache/stanbol/contenthub/search/engines/solr/SolrSearchEngine.java (original)
+++ incubator/stanbol/trunk/contenthub/search/engines/solr/src/main/java/org/apache/stanbol/contenthub/search/engines/solr/SolrSearchEngine.java Wed Dec 7 15:41:42 2011
@@ -37,6 +37,7 @@ import org.apache.stanbol.commons.solr.I
import org.apache.stanbol.commons.solr.RegisteredSolrServerTracker;
import org.apache.stanbol.commons.solr.managed.ManagedSolrServer;
import org.apache.stanbol.contenthub.core.search.execution.SearchContextImpl;
+import org.apache.stanbol.contenthub.core.store.SolrStoreImpl;
import org.apache.stanbol.contenthub.servicesapi.search.engine.EngineProperties;
import org.apache.stanbol.contenthub.servicesapi.search.engine.SearchEngine;
import org.apache.stanbol.contenthub.servicesapi.search.engine.SearchEngineException;
@@ -47,6 +48,7 @@ import org.apache.stanbol.contenthub.ser
import org.apache.stanbol.contenthub.servicesapi.search.execution.SearchContext;
import org.apache.stanbol.contenthub.servicesapi.search.execution.SearchContextFactory;
import org.apache.stanbol.contenthub.servicesapi.search.vocabulary.SearchVocabulary;
+import org.apache.stanbol.contenthub.servicesapi.store.vocabulary.SolrVocabulary;
import org.apache.stanbol.contenthub.servicesapi.store.vocabulary.SolrVocabulary.SolrFieldName;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
@@ -78,41 +80,42 @@ public class SolrSearchEngine implements
properties.put(PROCESSING_ORDER, PROCESSING_POST);
}
- private final static String SERVER_NAME = "contenthub";
-
protected RegisteredSolrServerTracker serverTracker;
+
@Reference
- ManagedSolrServer solrDirectoryManager;
+ ManagedSolrServer managedSolrServer;
/**
- * Tries to connect EmbeddedSolr at startup, if can not, server becomes null and no query is executed on server
+ * Tries to connect EmbeddedSolr at startup, if can not, server becomes null and no query is executed on
+ * server
+ *
* @param cc
*/
@Activate
- protected void activate(ComponentContext cc) {
+ public void activate(ComponentContext cc) {
try {
- if (!solrDirectoryManager.isManagedIndex(SERVER_NAME)) {
- solrDirectoryManager.createSolrIndex(SERVER_NAME, SERVER_NAME, null);
- }
- serverTracker = new RegisteredSolrServerTracker(cc.getBundleContext(),
- new IndexReference(solrDirectoryManager.getServerName(), SERVER_NAME));
+ serverTracker = new RegisteredSolrServerTracker(cc.getBundleContext(), new IndexReference(
+ managedSolrServer.getServerName(), SolrStoreImpl.SOLR_SERVER_NAME));
serverTracker.open();
} catch (Exception e) {
- logger.warn("Could not get the EmbeddedSolr Instance at location : {}", SERVER_NAME, e);
+ logger.warn("Could not get the EmbeddedSolr Instance at location : {}",
+ SolrStoreImpl.SOLR_SERVER_NAME, e);
}
}
+
@Deactivate
protected void deactivate(ComponentContext cc) {
- if(serverTracker != null){
+ if (serverTracker != null) {
serverTracker.close();
serverTracker = null;
}
- solrDirectoryManager = null;
+ managedSolrServer = null;
}
- protected SolrServer getServer(){
+ protected SolrServer getServer() {
return serverTracker != null ? serverTracker.getService() : null;
}
+
@Override
/**
* gets the keywords from search context and then queries solr with these keywords and facet constraints,
@@ -120,17 +123,14 @@ public class SolrSearchEngine implements
* After searching for all keywords, omits the results founded by other engines and having non matching field constraints
*/
public void search(SearchContext searchContext) throws SearchEngineException {
- SolrServer server = getServer();
- if (server == null) {
+ SolrServer solrServer = getServer();
+ if (solrServer == null) {
logger.warn("No EmbeddedSolr, so SolrSearchEngine does not work");
} else {
for (QueryKeyword qk : searchContext.getQueryKeyWords()) {
- searchForKeyword(server,qk, searchContext);
- for (Keyword kw : qk.getRelatedKeywords()) {
- searchForKeyword(server, kw, searchContext);
- }
+ searchForKeyword(solrServer, qk, searchContext);
}
-
+
/*
* if (searchContext.getConstraints() != null && !searchContext.getConstraints().isEmpty()) {
* omitNonMatchingResult(searchContext); }
@@ -143,14 +143,14 @@ public class SolrSearchEngine implements
* the keyword to use in query
* @param searchContext
*/
- private void searchForKeyword(SolrServer server, Keyword kw, SearchContext searchContext) {
+ private void searchForKeyword(SolrServer solrServer, Keyword kw, SearchContext searchContext) {
String keyword = kw.getKeyword();
SolrQuery query = SolrSearchEngineHelper.keywordQueryWithFacets(keyword,
searchContext.getConstraints());
// Finding document resources by querying keyword with the facets
try {
- QueryResponse solrResult = server.query(query);
+ QueryResponse solrResult = solrServer.query(query);
processSolrResult(searchContext, kw, solrResult);
} catch (SolrServerException e) {
logger.warn("Server could not be queried", e);
@@ -167,7 +167,7 @@ public class SolrSearchEngine implements
if (className != null) {
query = SolrSearchEngineHelper.keywordQueryWithFacets(className,
searchContext.getConstraints());
- QueryResponse solrResult = server.query(query);
+ QueryResponse solrResult = solrServer.query(query);
processSolrResult(searchContext, kw, solrResult);
} else {
logger.info("Name of class could not be extracted from from class Resource : ",
@@ -189,7 +189,7 @@ public class SolrSearchEngine implements
if (individualName != null) {
query = SolrSearchEngineHelper.keywordQueryWithFacets(individualName,
searchContext.getConstraints());
- QueryResponse solrResult = server.query(query);
+ QueryResponse solrResult = solrServer.query(query);
processSolrResult(searchContext, kw, solrResult);
} else {
logger.info("Name of individual could not be extracted from individual Resource : ",
@@ -219,16 +219,27 @@ public class SolrSearchEngine implements
* cmsId == null ? "" : cmsId;
*/
- String selectionText = (String) resultDoc.getFieldValue(SolrFieldName.CONTENT.toString());
+ String contenthubContent = (String) resultDoc.getFieldValue(SolrFieldName.CONTENT.toString());
+ String creationDate = resultDoc.getFieldValue(SolrFieldName.CREATIONDATE.toString()).toString();
+ if (contenthubContent.length() > 50) {
+ contenthubContent = contenthubContent.substring(0, 50);
+ }
+
+ // TODO: This is not a good way of preparing html.
+ String selectionText = "id : " + contenthubId + "\n" + "content : "
+ + contenthubContent.replaceAll("\\n", "") + " ..." + "\n"
+ + "Creation Date: " + creationDate;
+
+ String documentTitle = getMeaningfulDocumentName(contenthubId, resultDoc);
// score of the keyword is used as a weight for newly found document
factory.createDocumentResource(contenthubId, 1.0, keyword.getScore() * score, keyword,
- selectionText/* , cmsId */);
+ selectionText, documentTitle/* , cmsId */);
}
}
@SuppressWarnings("unused")
- private void omitNonMatchingResult(SolrServer server, SearchContext searchContext) {
+ private void omitNonMatchingResult(SolrServer solrServer, SearchContext searchContext) {
OntModel contextModel = (SearchContextImpl) searchContext;
ResIterator docResources = contextModel.listResourcesWithProperty(RDF.type,
SearchVocabulary.DOCUMENT_RESOURCE);
@@ -238,7 +249,7 @@ public class SolrSearchEngine implements
.keywordQueryWithFacets("*:*", searchContext.getConstraints());
QueryResponse solrResult = null;
try {
- solrResult = server.query(query);
+ solrResult = solrServer.query(query);
} catch (SolrServerException e) {
logger.warn("Error while querying with query : {} ", query, e);
}
@@ -293,4 +304,19 @@ public class SolrSearchEngine implements
return properties;
}
+ private String getMeaningfulDocumentName(String docId, SolrDocument resultDoc) {
+ String titleField;
+ if (resultDoc.containsKey(SolrVocabulary.SolrFieldName.TITLE.toString())) {
+ return resultDoc.getFieldValue(SolrFieldName.TITLE.toString()).toString();
+ } else if (resultDoc.containsKey("name_t")) {
+ titleField = "name_t";
+ } else if (resultDoc.containsKey("subject_t")) {
+ titleField = "subject_t";
+ } else {
+ return docId;
+ }
+
+ String documentTitle = ((List<Object>) resultDoc.getFieldValues(titleField)).get(0).toString();
+ return documentTitle;
+ }
}
Modified: incubator/stanbol/trunk/contenthub/search/engines/solr/src/main/java/org/apache/stanbol/contenthub/search/engines/solr/SolrSearchEngineHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/solr/src/main/java/org/apache/stanbol/contenthub/search/engines/solr/SolrSearchEngineHelper.java?rev=1211477&r1=1211476&r2=1211477&view=diff
==============================================================================
--- incubator/stanbol/trunk/contenthub/search/engines/solr/src/main/java/org/apache/stanbol/contenthub/search/engines/solr/SolrSearchEngineHelper.java (original)
+++ incubator/stanbol/trunk/contenthub/search/engines/solr/src/main/java/org/apache/stanbol/contenthub/search/engines/solr/SolrSearchEngineHelper.java Wed Dec 7 15:41:42 2011
@@ -23,6 +23,7 @@ import java.util.Map.Entry;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.util.ClientUtils;
+import org.apache.stanbol.contenthub.servicesapi.store.vocabulary.SolrVocabulary;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -50,7 +51,8 @@ public class SolrSearchEngineHelper {
String fieldName = ClientUtils.escapeQueryChars(entry.getKey());
for (Object value : entry.getValue()) {
queryString = queryString + and + fieldName + queryDelimiter
- + ClientUtils.escapeQueryChars((String) value);
+ + (SolrVocabulary.isNameRangeField(fieldName) ?
+ (String) value : ClientUtils.escapeQueryChars((String) value));
}
}
} catch (Exception e) {
Added: incubator/stanbol/trunk/contenthub/search/engines/wordnet/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/wordnet/pom.xml?rev=1211477&view=auto
==============================================================================
--- incubator/stanbol/trunk/contenthub/search/engines/wordnet/pom.xml (added)
+++ incubator/stanbol/trunk/contenthub/search/engines/wordnet/pom.xml Wed Dec 7 15:41:42 2011
@@ -0,0 +1,76 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.contenthub.parent</artifactId>
+ <version>0.9.0-incubating-SNAPSHOT</version>
+ <relativePath>../../../parent</relativePath>
+ </parent>
+
+ <artifactId>org.apache.stanbol.contenthub.search.engines.wordnet</artifactId>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Search Wordnet Engine</name>
+
+ <description>In preprocess phase finds related keywords and adds to the
+ keywordlist</description>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.contenthub.servicesapi</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.contenthub.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>org.apache.felix.scr.annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.osgi</groupId>
+ <artifactId>org.osgi.compendium</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>edu.smu.tspell.jaws</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ </dependencies>
+</project>
Added: incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/Scored.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/Scored.java?rev=1211477&view=auto
==============================================================================
--- incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/Scored.java (added)
+++ incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/Scored.java Wed Dec 7 15:41:42 2011
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stanbol.contenthub.search.engines.wordnet;
+
+/**
+ *
+ * @author cihan
+ *
+ */
+public class Scored {
+
+ private double score;
+ private String keyword;
+
+ public Scored(String keyword, double score) {
+ this.score = score;
+ this.keyword = keyword;
+ }
+
+ public String getKeyword() {
+ return keyword;
+ }
+
+ public double getScore() {
+ return score;
+ }
+
+}
Added: incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetClient.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetClient.java?rev=1211477&view=auto
==============================================================================
--- incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetClient.java (added)
+++ incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetClient.java Wed Dec 7 15:41:42 2011
@@ -0,0 +1,274 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stanbol.contenthub.search.engines.wordnet;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.stanbol.contenthub.servicesapi.search.engine.SearchEngineException;
+
+import edu.smu.tspell.wordnet.NounSynset;
+import edu.smu.tspell.wordnet.Synset;
+import edu.smu.tspell.wordnet.SynsetType;
+import edu.smu.tspell.wordnet.VerbSynset;
+import edu.smu.tspell.wordnet.WordNetDatabase;
+import edu.smu.tspell.wordnet.WordNetException;
+
+/**
+ * @author anil.sinaci
+ * @author cihan
+ *
+ */
+public class WordnetClient {
+ public static final int EXPANSION_0 = 1;
+ public static final int EXPANSION_1 = 2;
+ public static final int EXPANSION_2 = 3;
+ public static final int EXPANSION_3 = 4;
+
+ private List<Scored> scoredList;
+ private Double maxScore;
+ private WordNetDatabase wordnetDatabase;
+ private double degradingFactor;
+ private int expansionLevel;
+
+ public WordnetClient(String wordnetDatabase, Integer expansionLevel, Double degradingFactor) {
+ System.setProperty("wordnet.database.dir", wordnetDatabase);
+ this.wordnetDatabase = WordNetDatabase.getFileInstance();
+ this.degradingFactor = degradingFactor;
+ this.expansionLevel = expansionLevel;
+ }
+
+ private void removeDuplicates() {
+
+ for (int i = 0; i < scoredList.size(); i++) {
+ for (int j = i + 1; j < scoredList.size(); j++) {
+ if (scoredList.get(i).getKeyword().equals(scoredList.get(j).getKeyword())) {
+ scoredList.remove(j);
+ j--;
+ }
+ }
+ }
+ }
+
+ /*
+ * public List<Keyword> getScoredWordnetResources(String[] keywordList, double maxScore, double
+ * degradingFactor, int expansionLevel) throws WordNetException { List<Keyword> returnList = new
+ * ArrayList<Keyword>(); for (String keyword : keywordList) {
+ * returnList.addAll(getScoredWordnetResources(keyword, maxScore, degradingFactor, expansionLevel)); }
+ * return returnList; }
+ */
+ public final List<Scored> getScoredWordnetResources(String keyword, double maxScore) throws SearchEngineException {
+ scoredList = new ArrayList<Scored>();
+ this.maxScore = maxScore;
+ Synset[] synsets0;
+ try {
+ synsets0 = wordnetDatabase.getSynsets(keyword);
+ } catch (WordNetException e) {
+ throw new SearchEngineException("Error accessing wordnet database", e);
+ }
+ for (Synset synset : synsets0) {
+ String[] wordForms = synset.getWordForms();
+ for (String wordForm : wordForms) {
+ scoredList.add(new Scored(wordForm, maxScore / degradingFactor));
+ }
+ }
+
+ if (expansionLevel == WordnetClient.EXPANSION_0) {
+ return scoredList;
+ }
+
+ /*
+ * Synset[] adjectiveSynsets = wordnetDatabase.getSynsets(keyword, SynsetType.ADJECTIVE); Synset[]
+ * adverbSynsets = wordnetDatabase.getSynsets(keyword, SynsetType.ADVERB);
+ */
+ Synset[] nounSynsets = wordnetDatabase.getSynsets(keyword, SynsetType.NOUN);
+ Synset[] verbSynsets = wordnetDatabase.getSynsets(keyword, SynsetType.VERB);
+
+ for (int i = 0; i < expansionLevel - 1; i++) {
+ // TODO adjectives and adverbs not implemented yet
+ /*
+ * adjectiveSynsets = handleAdjectives(adjectiveSynsets, i + 1); adverbSynsets =
+ * handleAdverbs(adverbSynsets, i + 1);
+ */
+ nounSynsets = handleNouns(nounSynsets, i + 1);
+ verbSynsets = handleVerbs(verbSynsets, i + 1);
+ }
+
+ removeDuplicates();
+ return scoredList;
+ }
+
+ // TODO: Adjectives and Adverbs are not included yet
+
+ /*
+ * private AdjectiveSynset[] handleAdjectives(Synset[] adjectiveSynsets, int currentExpansionLevel) {
+ *
+ * if (adjectiveSynsets == null) { return null; }
+ *
+ * return null; }
+ *
+ * private AdverbSynset[] handleAdverbs(Synset[] adverbSynsets, int currentExpansionLevel) {
+ *
+ * if (adverbSynsets == null) { return null; }
+ *
+ * return null; }
+ */
+
+ private NounSynset[] handleNouns(Synset[] nounSynsets, int currentExpansionLevel) {
+
+ if (nounSynsets == null) {
+ return null;
+ }
+
+ List<NounSynset> newNounSynset = new ArrayList<NounSynset>();
+
+ // TODO: Not all methods of a NounSynset is called.
+
+ for (Synset synset : nounSynsets) {
+ NounSynset nounSynset = (NounSynset) synset;
+
+ // Hypernyms
+ // NounSynset[] hypernyms = nounSynset.getHypernyms();
+ // for (NounSynset hypernym : hypernyms) {
+ // addWordForms(hypernym.getWordForms(), currentExpansionLevel);
+ // newNounSynset.add(hypernym);
+ // }
+ // NounSynset[] instanceHypernyms = nounSynset.getInstanceHypernyms();
+ // for (NounSynset instanceHypernym : instanceHypernyms) {
+ // addWordForms(instanceHypernym.getWordForms(), currentExpansionLevel);
+ // newNounSynset.add(instanceHypernym);
+ // }
+ //
+ // // Hyponyms
+ // NounSynset[] directHyponyms = nounSynset.getHyponyms();
+ // for (NounSynset directHyponym : directHyponyms) {
+ // addWordForms(directHyponym.getWordForms(), currentExpansionLevel);
+ // newNounSynset.add(directHyponym);
+ // }
+ // NounSynset[] instanceHyponyms = nounSynset.getInstanceHyponyms();
+ // for (NounSynset instanceHyponym : instanceHyponyms) {
+ // addWordForms(instanceHyponym.getWordForms(), currentExpansionLevel);
+ // newNounSynset.add(instanceHyponym);
+ // }
+ //
+ // // Holonyms
+ // NounSynset[] memberHolonyms = nounSynset.getMemberHolonyms();
+ // for (NounSynset memberHolonym : memberHolonyms) {
+ // addWordForms(memberHolonym.getWordForms(), currentExpansionLevel);
+ // newNounSynset.add(memberHolonym);
+ // }
+ // NounSynset[] substanceHolonyms = nounSynset.getSubstanceHolonyms();
+ // for (NounSynset substanceHolonym : substanceHolonyms) {
+ // addWordForms(substanceHolonym.getWordForms(), currentExpansionLevel);
+ // newNounSynset.add(substanceHolonym);
+ // }
+ // NounSynset[] partHolonyms = nounSynset.getPartHolonyms();
+ // for (NounSynset partHolonym : partHolonyms) {
+ // addWordForms(partHolonym.getWordForms(), currentExpansionLevel);
+ // newNounSynset.add(partHolonym);
+ // }
+ //
+ // // Meronyms
+ // NounSynset[] memberMeronyms = nounSynset.getMemberMeronyms();
+ // for (NounSynset memberMeronym : memberMeronyms) {
+ // addWordForms(memberMeronym.getWordForms(), currentExpansionLevel);
+ // newNounSynset.add(memberMeronym);
+ // }
+ // NounSynset[] partMeronyms = nounSynset.getPartMeronyms();
+ // for (NounSynset partMeronym : partMeronyms) {
+ // addWordForms(partMeronym.getWordForms(), currentExpansionLevel);
+ // newNounSynset.add(partMeronym);
+ // }
+ //
+ //
+ // NounSynset[] substanceMeronyms = nounSynset.getSubstanceMeronyms();
+ // for (NounSynset substanceMeronym : substanceMeronyms) {
+ // addWordForms(substanceMeronym.getWordForms(), currentExpansionLevel);
+ // newNounSynset.add(substanceMeronym);
+ // }
+
+ handleSynset(nounSynset.getHypernyms(), newNounSynset, currentExpansionLevel);
+ handleSynset(nounSynset.getInstanceHypernyms(), newNounSynset, currentExpansionLevel);
+ handleSynset(nounSynset.getHyponyms(), newNounSynset, currentExpansionLevel);
+ handleSynset(nounSynset.getInstanceHyponyms(), newNounSynset, currentExpansionLevel);
+ handleSynset(nounSynset.getMemberHolonyms(), newNounSynset, currentExpansionLevel);
+ handleSynset(nounSynset.getSubstanceHolonyms(), newNounSynset, currentExpansionLevel);
+ handleSynset(nounSynset.getPartHolonyms(), newNounSynset, currentExpansionLevel);
+ handleSynset(nounSynset.getMemberMeronyms(), newNounSynset, currentExpansionLevel);
+ handleSynset(nounSynset.getSubstanceMeronyms(), newNounSynset, currentExpansionLevel);
+ handleSynset(nounSynset.getPartMeronyms(), newNounSynset, currentExpansionLevel);
+ }
+
+ return newNounSynset.toArray(new NounSynset[newNounSynset.size()]);
+ }
+
+ private void handleSynset(NounSynset[] parts, List<NounSynset> accumulator, int currentExpansionLevel) {
+ for (NounSynset part : parts) {
+ addWordForms(part.getWordForms(), currentExpansionLevel);
+ accumulator.add(part);
+ }
+ }
+
+ private VerbSynset[] handleVerbs(Synset[] verbSynsets, int currentExpansionLevel) {
+
+ if (verbSynsets == null) {
+ return null;
+ }
+
+ List<VerbSynset> newVerbSynset = new ArrayList<VerbSynset>();
+
+ // TODO: Not all methods of a VerbSynset is called.
+
+ for (Synset synset : verbSynsets) {
+ VerbSynset verbSynset = (VerbSynset) synset;
+
+ VerbSynset[] hypernyms = verbSynset.getHypernyms();
+ for (VerbSynset hypernym : hypernyms) {
+ addWordForms(hypernym.getWordForms(), currentExpansionLevel);
+ newVerbSynset.add(hypernym);
+ }
+
+ VerbSynset[] troponyms = verbSynset.getTroponyms();
+ for (VerbSynset troponym : troponyms) {
+ addWordForms(troponym.getWordForms(), currentExpansionLevel);
+ newVerbSynset.add(troponym);
+ }
+
+ VerbSynset[] entailments = verbSynset.getEntailments();
+ for (VerbSynset entailment : entailments) {
+ addWordForms(entailment.getWordForms(), currentExpansionLevel);
+ newVerbSynset.add(entailment);
+ }
+
+ VerbSynset[] outcomes = verbSynset.getOutcomes();
+ for (VerbSynset outcome : outcomes) {
+ addWordForms(outcome.getWordForms(), currentExpansionLevel);
+ newVerbSynset.add(outcome);
+ }
+
+ }
+
+ return newVerbSynset.toArray(new VerbSynset[newVerbSynset.size()]);
+ }
+
+ private void addWordForms(String[] wordForms, int currentExpansionLevel) {
+ for (String wordForm : wordForms) {
+ scoredList.add(new Scored(wordForm, maxScore / (degradingFactor * currentExpansionLevel)));
+ }
+ }
+}
Added: incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetEngine.java?rev=1211477&view=auto
==============================================================================
--- incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetEngine.java (added)
+++ incubator/stanbol/trunk/contenthub/search/engines/wordnet/src/main/java/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetEngine.java Wed Dec 7 15:41:42 2011
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stanbol.contenthub.search.engines.wordnet;
+
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.PropertyOption;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.contenthub.servicesapi.search.engine.EngineProperties;
+import org.apache.stanbol.contenthub.servicesapi.search.engine.SearchEngine;
+import org.apache.stanbol.contenthub.servicesapi.search.engine.SearchEngineException;
+import org.apache.stanbol.contenthub.servicesapi.search.execution.QueryKeyword;
+import org.apache.stanbol.contenthub.servicesapi.search.execution.SearchContext;
+import org.apache.stanbol.contenthub.servicesapi.search.execution.SearchContextFactory;
+import org.apache.stanbol.contenthub.servicesapi.search.execution.Keyword.RelatedKeywordSource;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ *
+ * @author cihan
+ *
+ */
+@Component(metatype = true)
+@Service
+public class WordnetEngine implements SearchEngine, EngineProperties {
+ private static final Logger LOGGER = LoggerFactory.getLogger(WordnetEngine.class);
+
+ @Property(name = WordnetEngine.WORDNET_DATABASE, value = "")
+ public static final String WORDNET_DATABASE = "org.apache.stanbol.contenthub.search.engines.wordnet.Wordnet.database";
+
+ @Property(name = WordnetEngine.WORDNET_EXPANSION_LEVEL, value = "1", options = {
+ @PropertyOption(name = "1", value = "1"),
+ @PropertyOption(name = "2", value = "2"),
+ @PropertyOption(name = "3", value = "3"),
+ @PropertyOption(name = "4", value = "4")})
+ public static final String WORDNET_EXPANSION_LEVEL = "org.apache.stanbol.contenthub.search.engines.wordnet.Wordnet.expansionLevel";
+
+ @Property(name = WordnetEngine.WORDNET_DEGRADING_FACTOR, value = "1.0", options = {
+ @PropertyOption(name = "1.0", value = "1.0"),
+ @PropertyOption(name = "1.9", value = "1.9"),
+ @PropertyOption(name = "2.0", value = "2.0")})
+ public static final String WORDNET_DEGRADING_FACTOR = "org.apache.stanbol.contenthub.search.engines.wordnet.Wordnet.degradingFactor";
+
+ private WordnetClient wordnetClient;
+ private Map<String,Object> engineProperties = new HashMap<String,Object>();
+
+ private void checkProperties(@SuppressWarnings("rawtypes") Dictionary properties) {
+ Object databasePath = properties.get(WORDNET_DATABASE);
+ if (!(databasePath instanceof String) || "".equals(databasePath)) {
+ throw new IllegalArgumentException("Wordnet database path can not be empty");
+ }
+
+ }
+
+ @Activate
+ public final void activate(ComponentContext cc) {
+ @SuppressWarnings("rawtypes")
+ Dictionary properties = cc.getProperties();
+ checkProperties(properties);
+ String wordnetDatabase = (String) properties.get(WORDNET_DATABASE);
+ Integer expansionLevel = Integer.parseInt((String) properties.get(WORDNET_EXPANSION_LEVEL));
+ Double degradingFactor = Double.parseDouble((String) properties.get(WORDNET_DEGRADING_FACTOR));
+ wordnetClient = new WordnetClient(wordnetDatabase, expansionLevel, degradingFactor);
+ this.engineProperties.put(EngineProperties.PROCESSING_ORDER, EngineProperties.PROCESSING_PRE);
+ }
+
+ @Override
+ public final void search(SearchContext searchContext) throws SearchEngineException {
+ SearchContextFactory f = searchContext.getFactory();
+ for (QueryKeyword qkw : searchContext.getQueryKeyWords()) {
+ // First keyword is always with the highest score
+
+ LOGGER.debug("Getting related words for {}, {}", qkw.getKeyword(), qkw.getScore());
+ List<Scored> keywords = wordnetClient.getScoredWordnetResources(normalize(qkw.getKeyword()),
+ qkw.getScore());
+ for (Scored wordnetFinding : keywords) {
+ LOGGER.debug("\t {}:{}", wordnetFinding.getKeyword(), wordnetFinding.getScore());
+ f.createKeyword(wordnetFinding.getKeyword(), wordnetFinding.getScore(), qkw,RelatedKeywordSource.WORDNET.getName());
+ }
+ }
+ }
+
+ private String normalize(String keyword) {
+ return keyword.replaceAll("[^a-zA-Z0-9]", "");
+ }
+
+ @Override
+ public final Map<String,Object> getEngineProperties() {
+ return this.engineProperties;
+ }
+}
Added: incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/classes/org/apache/stanbol/contenthub/search/engines/wordnet/Scored.class
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/classes/org/apache/stanbol/contenthub/search/engines/wordnet/Scored.class?rev=1211477&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/classes/org/apache/stanbol/contenthub/search/engines/wordnet/Scored.class
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/classes/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetClient.class
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/classes/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetClient.class?rev=1211477&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/classes/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetClient.class
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/classes/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetEngine.class
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/classes/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetEngine.class?rev=1211477&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/classes/org/apache/stanbol/contenthub/search/engines/wordnet/WordnetEngine.class
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/maven-shared-archive-resources/META-INF/DEPENDENCIES
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/maven-shared-archive-resources/META-INF/DEPENDENCIES?rev=1211477&view=auto
==============================================================================
--- incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/maven-shared-archive-resources/META-INF/DEPENDENCIES (added)
+++ incubator/stanbol/trunk/contenthub/search/engines/wordnet/target/maven-shared-archive-resources/META-INF/DEPENDENCIES Wed Dec 7 15:41:42 2011
@@ -0,0 +1,27 @@
+// ------------------------------------------------------------------
+// Transitive dependencies of this project determined from the
+// maven pom organized by organization.
+// ------------------------------------------------------------------
+
+Apache Stanbol Search Wordnet Engine
+
+
+From: 'an unknown organization'
+ - jaws edu.smu.tspell:jaws:jar:1.2
+
+ - org.osgi.compendium org.osgi:org.osgi.compendium:jar:4.1.0
+
+ - StAX API (http://stax.codehaus.org/) stax:stax-api:jar:1.0.1
+ License: The Apache Software License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0.txt)
+
+From: 'QOS.ch' (http://www.qos.ch)
+ - SLF4J API Module (http://www.slf4j.org) org.slf4j:slf4j-api:jar:1.6.1
+ License: MIT License (http://www.opensource.org/licenses/mit-license.php)
+
+From: 'The Apache Software Foundation' (http://www.apache.org/)
+ - Annotations for SCR (http://felix.apache.org/org.apache.felix.scr.annotations/) org.apache.felix:org.apache.felix.scr.annotations:jar:1.4.0
+ License: The Apache Software License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0.txt)
+
+
+
+