You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/02/27 12:31:08 UTC

svn commit: r1572524 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker: ./ EntityLinkerTool.java

Author: joern
Date: Thu Feb 27 11:31:08 2014
New Revision: 1572524

URL: http://svn.apache.org/r1572524
Log:
OPENNLP-630 First draft of the entity linker command line tool

Added:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java?rev=1572524&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java Thu Feb 27 11:31:08 2014
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.entitylinker;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.cmdline.BasicCmdLineTool;
+import opennlp.tools.cmdline.CLI;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.PerformanceMonitor;
+import opennlp.tools.cmdline.SystemInputStreamFactory;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.entitylinker.EntityLinker;
+import opennlp.tools.entitylinker.EntityLinkerFactory;
+import opennlp.tools.entitylinker.EntityLinkerProperties;
+import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.Span;
+
+public class EntityLinkerTool extends BasicCmdLineTool {
+
+  @Override
+  public String getShortDescription() {
+    return "links an entity to an external data set";
+  }
+  
+  @Override
+  public void run(String[] args) {
+    
+    if (0 == args.length) {
+      System.out.println(getHelp());
+    }
+    else {
+      // TODO: Ask Mark if we can remove the type, the user knows upfront if he tries
+      // to link place names or company mentions ...
+      String entityType = "location";
+      
+      // Load the properties, they should contain everything that is necessary to instantiate
+      // the component
+      
+      // TODO: Entity Linker Properties constructor should not duplicate code
+      EntityLinkerProperties properties;
+      try {
+        properties = new EntityLinkerProperties(new File(args[0]));
+      }
+      catch (IOException e) {
+        throw new TerminateToolException(-1, "Failed to load the properties file!");
+      }
+      
+      // TODO: It should not just throw Exception.
+      
+      EntityLinker entityLinker;
+      try {
+        entityLinker = EntityLinkerFactory.getLinker(entityType, properties);
+      }
+      catch (Exception e) {
+        throw new TerminateToolException(-1, "Failed to instantiate the Entity Linker: " + e.getMessage());
+      }
+      
+      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
+      perfMon.start();
+      
+      try {
+        
+        ObjectStream<String> untokenizedLineStream = new PlainTextByLineStream(
+            new SystemInputStreamFactory(), SystemInputStreamFactory.encoding());
+        
+        List<NameSample> document = new ArrayList<NameSample>();
+        
+        String line;
+        while ((line = untokenizedLineStream.read()) != null) {
+
+          if (line.trim().isEmpty()) {
+            // Run entity linker ... and output result ...
+            
+            StringBuilder text = new StringBuilder();
+            Span sentences[] = new Span[document.size()];
+            List<Span> tokens = new ArrayList<Span>();
+            List<Span> names = new ArrayList<Span>();
+            
+            for (int i = 0; i < document.size(); i++) {
+              
+              NameSample sample = document.get(i);
+              
+              int sentenceBegin = text.length();
+              
+              int tokenSentOffset = tokens.size();
+                  
+              // for all tokens
+              for (String token : sample.getSentence()) {
+                int tokenBegin = text.length();
+                text.append(token);
+                Span tokenSpan = new Span(tokenBegin, text.length());
+                text.append(" ");
+              }
+              
+              for (Span name : sample.getNames()) {
+                names.add(new Span(tokenSentOffset + name.getStart(), tokenSentOffset + name.getEnd(), name.getType()));
+              }
+              
+              sentences[i] = new Span(sentenceBegin, text.length());
+              text.append("\n");
+            }
+            
+            List<Span> linkedSpans = entityLinker.find(text.toString(), sentences, tokens.toArray(new Span[tokens.size()]),
+                names.toArray(new Span[names.size()]));
+            
+            for (int i = 0; i < linkedSpans.size(); i++) {
+              System.out.println(linkedSpans.get(i));
+            }
+            
+            perfMon.incrementCounter(document.size());
+            document.clear();
+          }
+          else {
+            document.add(NameSample.parse(line, false));
+          }
+        }
+      }
+      catch (IOException e) {
+        CmdLineUtil.handleStdinIoError(e);
+      }
+      
+      perfMon.stopAndPrintFinalResult();
+    }
+  }
+
+  @Override
+  public String getHelp() {
+    return "Usage: " + CLI.CMD + " " + getName() + " model < sentences";
+  }
+}