You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2014/05/27 14:28:00 UTC
svn commit: r1597760 - in
/stanbol/branches/release-0.12/enhancement-engines/geonames: pom.xml
src/main/java/org/apache/stanbol/enhancer/engines/geonames/impl/LocationEnhancementEngine.java
Author: rwesten
Date: Tue May 27 12:28:00 2014
New Revision: 1597760
URL: http://svn.apache.org/r1597760
Log:
A simple fix for STANBOL-1303 that normalizes scores based on highest score
Modified:
stanbol/branches/release-0.12/enhancement-engines/geonames/pom.xml
stanbol/branches/release-0.12/enhancement-engines/geonames/src/main/java/org/apache/stanbol/enhancer/engines/geonames/impl/LocationEnhancementEngine.java
Modified: stanbol/branches/release-0.12/enhancement-engines/geonames/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/geonames/pom.xml?rev=1597760&r1=1597759&r2=1597760&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/geonames/pom.xml (original)
+++ stanbol/branches/release-0.12/enhancement-engines/geonames/pom.xml Tue May 27 12:28:00 2014
@@ -76,8 +76,8 @@
<configuration>
<excludes>
<!-- Reactivated this test. If it fails because of geonames.org
- we might need to deactivate it again. -->
- <exclude>**/TestLocationEnhancementEngine*.java</exclude>
+ we might need to deactivate it again.
+ <exclude>**/TestLocationEnhancementEngine*.java</exclude> -->
</excludes>
</configuration>
</plugin>
Modified: stanbol/branches/release-0.12/enhancement-engines/geonames/src/main/java/org/apache/stanbol/enhancer/engines/geonames/impl/LocationEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/geonames/src/main/java/org/apache/stanbol/enhancer/engines/geonames/impl/LocationEnhancementEngine.java?rev=1597760&r1=1597759&r2=1597760&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/geonames/src/main/java/org/apache/stanbol/enhancer/engines/geonames/impl/LocationEnhancementEngine.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/geonames/src/main/java/org/apache/stanbol/enhancer/engines/geonames/impl/LocationEnhancementEngine.java Tue May 27 12:28:00 2014
@@ -372,9 +372,10 @@ public class LocationEnhancementEngine
throw new EngineException(this, ci, e);
}
if (results != null) {
+ Double maxScore = results.isEmpty() ? null : results.get(0).getScore();
for (Toponym result : results) {
log.debug("process result {} {}",result.getGeoNameId(),result.getName());
- Double score = getToponymScore(result);
+ Double score = getToponymScore(result,maxScore);
log.debug(" > score {}",score);
if (score != null) {
if (score < minScore) {
@@ -391,7 +392,7 @@ public class LocationEnhancementEngine
}
//write the enhancement!
NonLiteral locationEnhancement = writeEntityEnhancement(
- contentItemId, graph, literalFactory, result, entry.getValue(), null, null);
+ contentItemId, graph, literalFactory, result, entry.getValue(), null, score);
log.debug(" > {} >= {}",score,minHierarchyScore);
if (score != null && score >= minHierarchyScore) {
log.debug(" > getHierarchy for {} {}",result.getGeoNameId(),result.getName());
@@ -417,7 +418,7 @@ public class LocationEnhancementEngine
* Currently is is set to the value of the suggested entry
*/
writeEntityEnhancement(contentItemId, graph, literalFactory, hierarchyEntry,
- null, Collections.singletonList(locationEnhancement), score);
+ null, Collections.singletonList(locationEnhancement), 1.0);
}
}
} catch (Exception e) {
@@ -431,18 +432,18 @@ public class LocationEnhancementEngine
/**
* Getter for the socre in a range from [0..1]<p>
- * NOTE (2010.11.16, rw): GeoNames previously returned the score in the
- * range from [0..1]. It looks like that up from now they use the
- * range [0..100]. Therefore I created this method to make the necessary
- * adaptation.
- * see also http://code.google.com/p/iks-project/issues/detail?id=89
+ * NOTE (2014.05.27, rw): as described by STANBOL-1303 the scores returned
+ * by Geonames changed. So this method was adapted to calculate scores
+ * relative to the highest returned one.
*
* @param toponym the toponym
+ * @param maxScore the highest score or <code>null</code> if no highest score
+ * is yet known (assuming that the parsed toponym is the highest score
*
- * @return the score in a range [0..1]
+ * @return the score in a range [0..1] (relative to the highest score)
*/
- private Double getToponymScore(Toponym toponym) {
- return toponym.getScore() == null ? null : toponym.getScore() / 100;
+ private Double getToponymScore(Toponym toponym, Double maxScore) {
+ return toponym.getScore() == null ? null : maxScore == null ? 1 : Math.log1p(toponym.getScore())/Math.log1p(maxScore);
}
/**
@@ -479,7 +480,7 @@ public class LocationEnhancementEngine
private UriRef writeEntityEnhancement(UriRef contentItemId, MGraph graph,
LiteralFactory literalFactory, Toponym toponym,
Collection<NonLiteral> relatedEnhancements, Collection<NonLiteral> requiresEnhancements,
- Double defaultScore) {
+ Double score) {
UriRef entityRef = new UriRef("http://sws.geonames.org/" + toponym.getGeoNameId() + '/');
FeatureClass featureClass = toponym.getFeatureClass();
log.debug(" > featureClass " + featureClass);
@@ -500,10 +501,6 @@ public class LocationEnhancementEngine
graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entityRef));
log.debug(" > name " + toponym.getName());
graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(toponym.getName())));
- Double score = getToponymScore(toponym);
- if (score == null) { //use the default score as fallback
- score = defaultScore;
- }
if (score != null) {
graph.add(new TripleImpl(entityAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(score)));
}
@@ -629,5 +626,4 @@ public class LocationEnhancementEngine
return minHierarchyScore;
}
-
}