You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2016/10/17 22:59:24 UTC

opennlp-sandbox git commit: OPENNLP-864 Rename name finder annotator classes

Repository: opennlp-sandbox
Updated Branches:
  refs/heads/864 [created] 552afeab1


OPENNLP-864 Rename name finder annotator classes


Project: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/commit/552afeab
Tree: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/tree/552afeab
Diff: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/diff/552afeab

Branch: refs/heads/864
Commit: 552afeab109ba9a119da6665cb5568250f5e46ee
Parents: bf255a3
Author: kottmann <jo...@apache.org>
Authored: Tue Oct 18 00:58:53 2016 +0200
Committer: kottmann <jo...@apache.org>
Committed: Tue Oct 18 00:58:53 2016 +0200

----------------------------------------------------------------------
 .../opennlp/bratann/NameFinderAnnService.java   | 112 ++++++++++++++
 .../opennlp/bratann/NameFinderResource.java     | 148 +++++++++++++++++++
 .../opennlp/bratannotator/BratAnnService.java   | 112 --------------
 .../bratannotator/BratNameFinderResource.java   | 148 -------------------
 4 files changed, 260 insertions(+), 260 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/552afeab/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
new file mode 100644
index 0000000..926730f
--- /dev/null
+++ b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.bratann;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.List;
+
+import org.eclipse.jetty.server.Server;
+import org.eclipse.jetty.servlet.ServletContextHandler;
+import org.eclipse.jetty.servlet.ServletHolder;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinder;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.sentdetect.NewlineSentenceDetector;
+import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.SimpleTokenizer;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
+
+public class NameFinderAnnService {
+
+  public static SentenceDetector sentenceDetector = new NewlineSentenceDetector();;
+  public static Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
+  public static TokenNameFinder nameFinders[];
+
+  public static void main(String[] args) throws Exception {
+
+    if (args.length == 0) {
+      System.out.println(
+          "[-tokenizerModel file] [-ruleBasedTokenizer whitespace|simple] [-sentenceDetectorModel file] "
+              + "namefinderFile|nameFinderURI");
+      return;
+    }
+
+    List<String> argList = Arrays.asList(args);
+
+    int sentenceModelIndex = argList.indexOf("-sentenceDetectorModel")
+        + 1;
+    if (sentenceModelIndex > 0 && sentenceModelIndex < args.length) {
+      sentenceDetector = new SentenceDetectorME(
+          new SentenceModel(new File(args[sentenceModelIndex])));
+    }
+
+    int ruleBasedTokenizerIndex = argList.indexOf("-ruleBasedTokenizer") + 1;
+
+    if (ruleBasedTokenizerIndex > 0 && ruleBasedTokenizerIndex < args.length) {
+      if ("whitespace".equals(args[ruleBasedTokenizerIndex])) {
+        tokenizer = WhitespaceTokenizer.INSTANCE;
+      } else if ("simple".equals(args[ruleBasedTokenizerIndex])) {
+        tokenizer = SimpleTokenizer.INSTANCE;
+      } else {
+        System.out
+            .println("unkown tokenizer: " + args[ruleBasedTokenizerIndex]);
+        return;
+      }
+    }
+
+    int tokenizerModelIndex = argList.indexOf("-tokenizerModel") + 1;
+    if (tokenizerModelIndex > 0 && tokenizerModelIndex < args.length) {
+      tokenizer = new TokenizerME(
+          new TokenizerModel(new File(args[tokenizerModelIndex])));
+    }
+
+    nameFinders = new TokenNameFinder[] { new NameFinderME(
+        new TokenNameFinderModel(new File(args[args.length - 1]))) };
+
+    ServletContextHandler context = new ServletContextHandler(
+        ServletContextHandler.SESSIONS);
+    context.setContextPath("/");
+
+    Server jettyServer = new Server(8080);
+    jettyServer.setHandler(context);
+
+    ServletHolder jerseyServlet = context
+        .addServlet(com.sun.jersey.spi.container.servlet.ServletContainer.class, "/*");
+    jerseyServlet.setInitParameter("com.sun.jersey.config.property.packages",
+        "opennlp.bratann");
+    jerseyServlet.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", "true");
+    jerseyServlet.setInitOrder(0);
+
+    jerseyServlet.setInitParameter("jersey.config.server.provider.classnames",
+        NameFinderResource.class.getCanonicalName());
+
+    try {
+      jettyServer.start();
+      jettyServer.join();
+    } finally {
+      jettyServer.destroy();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/552afeab/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
new file mode 100644
index 0000000..39cec0e
--- /dev/null
+++ b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.bratann;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.QueryParam;
+import javax.ws.rs.core.MediaType;
+
+import opennlp.tools.namefind.TokenNameFinder;
+import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.util.Span;
+
+@Path("/ner")
+public class NameFinderResource {
+
+  public static class NameAnn {
+    public int[][] offsets;
+    public String[] texts;
+    public String type;
+  }
+
+  private SentenceDetector sentDetect = NameFinderAnnService.sentenceDetector;
+  private Tokenizer tokenizer = NameFinderAnnService.tokenizer;
+  private TokenNameFinder nameFinders[] = NameFinderAnnService.nameFinders;
+
+  private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset,
+      int endOffset) {
+
+    for (int i = beginOffset; i < endOffset; i++) {
+      if (!Character.isSpaceChar(s.charAt(i))) {
+        return i;
+      }
+    }
+
+    return -1;
+  }
+
+  @POST
+  @Consumes(MediaType.TEXT_PLAIN)
+  @Produces(MediaType.APPLICATION_JSON)
+  public Map<String, NameAnn> findNames(@QueryParam("model") String modelName,
+      String text) {
+
+    Span sentenceSpans[] = sentDetect.sentPosDetect(text);
+
+    Map<String, NameAnn> map = new HashMap<String, NameAnn>();
+
+    int indexCounter = 0;
+
+    for (int i = 0; i < sentenceSpans.length; i++) {
+      
+      String sentenceText = sentenceSpans[i].getCoveredText(text).toString();
+      
+      // offset of sentence gets lost here!
+      Span tokenSpans[] = tokenizer
+          .tokenizePos(sentenceText);
+
+      String tokens[] = Span.spansToStrings(tokenSpans, sentenceText);
+
+      for (TokenNameFinder nameFinder : nameFinders) {
+        Span names[] = nameFinder.find(tokens);
+
+        for (Span name : names) {
+          
+          int beginOffset = tokenSpans[name.getStart()].getStart()
+              + sentenceSpans[i].getStart();
+          int endOffset = tokenSpans[name.getEnd() - 1].getEnd()
+              + sentenceSpans[i].getStart();
+
+          // create a list of new line indexes
+          List<Integer> newLineIndexes = new ArrayList<Integer>();
+
+          // TODO: Code needs to handle case that there are multiple new lines
+          // in a row
+
+          boolean inNewLineSequence = false;
+          for (int ci = beginOffset; ci < endOffset; ci++) {
+            if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') {
+              if (!inNewLineSequence) {
+                newLineIndexes.add(ci);
+              }
+              inNewLineSequence = true;
+            } else {
+              inNewLineSequence = false;
+            }
+          }
+
+          List<String> textSegments = new ArrayList<String>();
+          List<int[]> spanSegments = new ArrayList<int[]>();
+
+          int segmentBegin = beginOffset;
+
+          for (int newLineOffset : newLineIndexes) {
+            // create segment from begin to offset
+            textSegments.add(text.substring(segmentBegin, newLineOffset));
+            spanSegments.add(new int[] { segmentBegin, newLineOffset });
+
+            segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1,
+                endOffset);
+
+            if (segmentBegin == -1) {
+              break;
+            }
+          }
+
+          // create left over segment
+          if (segmentBegin != -1) {
+            textSegments.add(text.substring(segmentBegin, endOffset));
+            spanSegments.add(new int[] { segmentBegin, endOffset });
+          }
+
+          NameAnn ann = new NameAnn();
+          ann.texts = textSegments.toArray(new String[textSegments.size()]);
+          ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]);
+          ann.type = name.getType();
+
+          map.put(Integer.toString(indexCounter++), ann);
+        }
+      }
+    }
+
+    return map;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/552afeab/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratAnnService.java
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratAnnService.java
deleted file mode 100644
index 6bfbb5c..0000000
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratAnnService.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.bratannotator;
-
-import java.io.File;
-import java.net.URI;
-import java.net.URL;
-
-import org.eclipse.jetty.server.Server;
-import org.eclipse.jetty.servlet.ServletContextHandler;
-import org.eclipse.jetty.servlet.ServletHolder;
-
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinder;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.sentdetect.NewlineSentenceDetector;
-import opennlp.tools.sentdetect.SentenceDetector;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.tokenize.SimpleTokenizer;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
-
-public class BratAnnService {
-  
-  public static SentenceDetector sentenceDetector;
-  public static Tokenizer tokenizer;
-  public static TokenNameFinder nameFinders[];
-  
-  public static void main(String[] args) throws Exception {
-    
-    if (args.length < 3) {
-      System.out.println("sentenceDetectorURI tokenizerURI namefinderURI_1 ... nameFinderURI_n");
-      return;
-    }
-
-    URI sentenceDetectorUri = URI.create(args[0]);
-    if ("sentenceDetector".equals(sentenceDetectorUri.getScheme())) {
-      
-      if ("newline".equals(sentenceDetectorUri.getSchemeSpecificPart())) {
-        sentenceDetector = new NewlineSentenceDetector();
-      }
-      else {
-        System.out.println("unkown sentence detector");
-        return;
-      }
-    }
-    else {
-      sentenceDetector = new SentenceDetectorME(new SentenceModel(new File(args[0])));
-    }
-    
-    URI tokenizerUri = URI.create(args[1]);
-    if ("tokenizer".equals(tokenizerUri.getScheme())) {
-      if ("whitespace".equals(tokenizerUri.getSchemeSpecificPart())) {
-        tokenizer = WhitespaceTokenizer.INSTANCE;
-      }
-      else if ("simple".equals(tokenizerUri.getSchemeSpecificPart())) {
-        tokenizer = SimpleTokenizer.INSTANCE;
-      } 
-      else {
-        System.out.println("unkown tokenizer");
-        return;
-      }
-
-    }
-    else {
-      tokenizer = new TokenizerME(new TokenizerModel(new File(args[1])));
-    }
-    
-    nameFinders = new TokenNameFinder[] {new NameFinderME(new TokenNameFinderModel(new URL(args[2])))};
-    
-    ServletContextHandler context = new ServletContextHandler(
-        ServletContextHandler.SESSIONS);
-    context.setContextPath("/");
-
-    Server jettyServer = new Server(8080);
-    jettyServer.setHandler(context);
-
-    ServletHolder jerseyServlet = context
-        .addServlet(com.sun.jersey.spi.container.servlet.ServletContainer.class, "/*");
-    jerseyServlet.setInitParameter("com.sun.jersey.config.property.packages", "opennlp.bratannotator");
-    jerseyServlet.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", "true");
-    jerseyServlet.setInitOrder(0);
-
-    jerseyServlet.setInitParameter("jersey.config.server.provider.classnames",
-        BratNameFinderResource.class.getCanonicalName());
-
-    try {
-      jettyServer.start();
-      jettyServer.join();
-    } finally {
-      jettyServer.destroy();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/552afeab/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratNameFinderResource.java
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratNameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratNameFinderResource.java
deleted file mode 100644
index 88dacc6..0000000
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratNameFinderResource.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.bratannotator;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import javax.ws.rs.Consumes;
-import javax.ws.rs.POST;
-import javax.ws.rs.Path;
-import javax.ws.rs.Produces;
-import javax.ws.rs.QueryParam;
-import javax.ws.rs.core.MediaType;
-
-import opennlp.tools.namefind.TokenNameFinder;
-import opennlp.tools.sentdetect.SentenceDetector;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.util.Span;
-
-@Path("/ner")
-public class BratNameFinderResource {
-
-  public static class NameAnn {
-    public int[][] offsets;
-    public String[] texts;
-    public String type;
-  }
-
-  private SentenceDetector sentDetect = BratAnnService.sentenceDetector;
-  private Tokenizer tokenizer = BratAnnService.tokenizer;
-  private TokenNameFinder nameFinders[] = BratAnnService.nameFinders;
-
-  private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset,
-      int endOffset) {
-
-    for (int i = beginOffset; i < endOffset; i++) {
-      if (!Character.isSpaceChar(s.charAt(i))) {
-        return i;
-      }
-    }
-
-    return -1;
-  }
-
-  @POST
-  @Consumes(MediaType.TEXT_PLAIN)
-  @Produces(MediaType.APPLICATION_JSON)
-  public Map<String, NameAnn> findNames(@QueryParam("model") String modelName,
-      String text) {
-
-    Span sentenceSpans[] = sentDetect.sentPosDetect(text);
-
-    Map<String, NameAnn> map = new HashMap<String, NameAnn>();
-
-    int indexCounter = 0;
-
-    for (int i = 0; i < sentenceSpans.length; i++) {
-      
-      String sentenceText = sentenceSpans[i].getCoveredText(text).toString();
-      
-      // offset of sentence gets lost here!
-      Span tokenSpans[] = tokenizer
-          .tokenizePos(sentenceText);
-
-      String tokens[] = Span.spansToStrings(tokenSpans, sentenceText);
-
-      for (TokenNameFinder nameFinder : nameFinders) {
-        Span names[] = nameFinder.find(tokens);
-
-        for (Span name : names) {
-          
-          int beginOffset = tokenSpans[name.getStart()].getStart()
-              + sentenceSpans[i].getStart();
-          int endOffset = tokenSpans[name.getEnd() - 1].getEnd()
-              + sentenceSpans[i].getStart();
-
-          // create a list of new line indexes
-          List<Integer> newLineIndexes = new ArrayList<Integer>();
-
-          // TODO: Code needs to handle case that there are multiple new lines
-          // in a row
-
-          boolean inNewLineSequence = false;
-          for (int ci = beginOffset; ci < endOffset; ci++) {
-            if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') {
-              if (!inNewLineSequence) {
-                newLineIndexes.add(ci);
-              }
-              inNewLineSequence = true;
-            } else {
-              inNewLineSequence = false;
-            }
-          }
-
-          List<String> textSegments = new ArrayList<String>();
-          List<int[]> spanSegments = new ArrayList<int[]>();
-
-          int segmentBegin = beginOffset;
-
-          for (int newLineOffset : newLineIndexes) {
-            // create segment from begin to offset
-            textSegments.add(text.substring(segmentBegin, newLineOffset));
-            spanSegments.add(new int[] { segmentBegin, newLineOffset });
-
-            segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1,
-                endOffset);
-
-            if (segmentBegin == -1) {
-              break;
-            }
-          }
-
-          // create left over segment
-          if (segmentBegin != -1) {
-            textSegments.add(text.substring(segmentBegin, endOffset));
-            spanSegments.add(new int[] { segmentBegin, endOffset });
-          }
-
-          NameAnn ann = new NameAnn();
-          ann.texts = textSegments.toArray(new String[textSegments.size()]);
-          ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]);
-          ann.type = name.getType();
-
-          map.put(Integer.toString(indexCounter++), ann);
-        }
-      }
-    }
-
-    return map;
-  }
-}