You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2014/02/25 21:54:26 UTC

svn commit: r1571820 [4/4] - in /ctakes/sandbox/ctakes-dictionary-lookup2: ./ desc/ desc/analysis_engine/ doc/ example/ example/desc/ example/desc/analysis_engine/ example/desc/analysis_engine/ctakes-dictionary-lookup2/ src/ src/main/ src/main/java/ sr...

Added: ctakes/sandbox/ctakes-dictionary-lookup2/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TokenMatchUtil.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-dictionary-lookup2/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TokenMatchUtil.java?rev=1571820&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-dictionary-lookup2/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TokenMatchUtil.java (added)
+++ ctakes/sandbox/ctakes-dictionary-lookup2/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TokenMatchUtil.java Tue Feb 25 20:54:25 2014
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.dictionary.lookup2.util;
+
+import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
+
+import java.util.List;
+
+/**
+ * Utility class with methods for matching tokens to valid terms
+ *
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 2/25/14
+ */
+final public class TokenMatchUtil {
+
+   private TokenMatchUtil() {}
+
+
+   /**
+    * Hopefully the jit will inline this method
+    * @param rareWordHit rare word term to check for match
+    * @param allTokens all tokens in a window
+    * @param termStartIndex index of first token in allTokens to check
+    * @param termEndIndex index of last token in allTokens to check
+    * @return true if the rare word term exists in allTokens within the given indices
+    */
+   public static boolean isTermMatch( final RareWordTerm rareWordHit, final List<FastLookupToken> allTokens,
+                                      final int termStartIndex, final int termEndIndex ) {
+      final char[] rareWordHitChars = rareWordHit.getText().toCharArray();
+      int hitCharIndex = 0;
+      for ( int i=termStartIndex; i<termEndIndex+1; i++ ) {
+         final char[] tokenChars = allTokens.get( i ).getText().toCharArray();
+         if ( isTokenMatch( rareWordHitChars, hitCharIndex, tokenChars ) ) {
+            // the normal token matched, move to the next token
+            hitCharIndex += tokenChars.length+1;
+            continue;
+         }
+         if ( allTokens.get( i ).getVariant() == null ) {
+            // the token normal didn't match and there is no variant
+            return false;
+         }
+         final char[] variantChars = allTokens.get( i ).getVariant().toCharArray();
+         if ( isTokenMatch( rareWordHitChars, hitCharIndex, variantChars ) ) {
+            // the variant matched, move to the next token
+            hitCharIndex += variantChars.length+1;
+            continue;
+         }
+         // the normal token didn't match and the variant didn't match
+         return false;
+      }
+      // some combination of token and variant matched
+      return true;
+   }
+
+   /**
+    * Check the rare word term to see if a given token is at a given index within that term
+    * Hopefully the jit will inline this method
+    * @param rareWordHitChars character array of all characters for the entire possible term (all words)
+    * @param hitCharIndex     character index in rare word term to check for token
+    * @param tokenChars       character array of the search token
+    * @return                 true if rareWordHitChars contains tokenChars at location hitCharIndex
+    */
+   static private boolean isTokenMatch( final char[] rareWordHitChars, final int hitCharIndex,
+                                          final char[] tokenChars ) {
+      if ( hitCharIndex + tokenChars.length > rareWordHitChars.length ) {
+         return false;
+      }
+      for ( int tokenCharIndex = 0; tokenCharIndex<tokenChars.length; tokenCharIndex++ ) {
+         if ( tokenChars[tokenCharIndex] != rareWordHitChars[hitCharIndex+tokenCharIndex] ) {
+            return false;
+         }
+      }
+      return true;
+   }
+
+
+}

Propchange: ctakes/sandbox/ctakes-dictionary-lookup2/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TokenMatchUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/ctakes-dictionary-lookup2/src/main/java/org/apache/ctakes/dictionary/lookup2/util/UmlsUserApprover.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-dictionary-lookup2/src/main/java/org/apache/ctakes/dictionary/lookup2/util/UmlsUserApprover.java?rev=1571820&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-dictionary-lookup2/src/main/java/org/apache/ctakes/dictionary/lookup2/util/UmlsUserApprover.java (added)
+++ ctakes/sandbox/ctakes-dictionary-lookup2/src/main/java/org/apache/ctakes/dictionary/lookup2/util/UmlsUserApprover.java Tue Feb 25 20:54:25 2014
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.dictionary.lookup2.util;
+
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLEncoder;
+
+import org.apache.ctakes.utils.env.EnvironmentVariable;
+
+
+/**
+ * Used to validate UMLS license / user.
+ *
+ * TODO  Authentication before download would be nice, or perhaps an encrypted download
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 2/19/14
+ */
+final public class UmlsUserApprover {
+
+
+   private final static String UMLSADDR_PARAM = "ctakes.umlsaddr";
+   private final static String UMLSVENDOR_PARAM = "ctakes.umlsvendor";
+   private final static String UMLSUSER_PARAM = "ctakes.umlsuser";
+   private final static String UMLSPW_PARAM = "ctakes.umlspw";
+
+   static final private Logger LOGGER = Logger.getLogger( "UmlsUserApprover" );
+
+   private UmlsUserApprover() {}
+
+   /**
+    * Silently validate the UMLS license / user
+    * @param aContext contains information about the UMLS license / user
+    * @throws ResourceInitializationException if the validation does not pass
+    */
+   static public void validateUMLSUser( final UimaContext aContext ) throws ResourceInitializationException {
+      final String umlsAddress = EnvironmentVariable.getEnv( UMLSADDR_PARAM, aContext );
+      final String umlsVendor = EnvironmentVariable.getEnv( UMLSVENDOR_PARAM, aContext );
+      final String umlsUser = EnvironmentVariable.getEnv( UMLSUSER_PARAM, aContext );
+      final String umlsPassword = EnvironmentVariable.getEnv( UMLSPW_PARAM, aContext );
+      LOGGER.info( "Using " + UMLSADDR_PARAM + ": " + umlsAddress + ": " + umlsUser );
+      if ( !isValidUMLSUser( umlsAddress, umlsVendor, umlsUser, umlsPassword ) ) {
+         LOGGER.error( "Error: Invalid UMLS License.  " +
+                        "A UMLS License is required to use the UMLS dictionary lookup. \n" +
+                        "Error: You may request one at: https://uts.nlm.nih.gov/license.html \n" +
+                        "Please verify your UMLS license settings in the " +
+                        "DictionaryLookupAnnotatorUMLS.xml configuration." );
+         throw new ResourceInitializationException( new Exception( "Failed to initilize.  Invalid UMLS License" ) );
+      }
+   }
+
+   /**
+    * @param umlsaddr -
+    * @param vendor   -
+    * @param username -
+    * @param password -
+    * @return true if the server at umlsaddr approves of the vendor, user, password combination
+    */
+   public static boolean isValidUMLSUser( final String umlsaddr, final String vendor,
+                                          final String username, final String password ) {
+      String data;
+      try {
+         data = URLEncoder.encode( "licenseCode", "UTF-8" ) + "=" + URLEncoder.encode( vendor, "UTF-8" );
+         data += "&" + URLEncoder.encode( "user", "UTF-8" ) + "=" + URLEncoder.encode( username, "UTF-8" );
+         data += "&" + URLEncoder.encode( "password", "UTF-8" ) + "=" + URLEncoder.encode( password, "UTF-8" );
+      } catch ( UnsupportedEncodingException unseE ) {
+         LOGGER.error( "Could not encode URL for " + username + " with vendor license " + vendor );
+         return false;
+      }
+      try {
+         final URL url = new URL( umlsaddr );
+         final URLConnection connection = url.openConnection();
+         connection.setDoOutput( true );
+         final OutputStreamWriter writer = new OutputStreamWriter( connection.getOutputStream() );
+         writer.write( data );
+         writer.flush();
+         boolean result = false;
+         final BufferedReader reader = new BufferedReader( new InputStreamReader( connection.getInputStream() ) );
+         String line;
+         while ( (line = reader.readLine()) != null ) {
+            final String trimline = line.trim();
+            if ( trimline.isEmpty() ) {
+               break;
+            }
+            result = trimline.equalsIgnoreCase( "<Result>true</Result>" );
+         }
+         writer.close();
+         reader.close();
+         return result;
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+   }
+
+
+}

Propchange: ctakes/sandbox/ctakes-dictionary-lookup2/src/main/java/org/apache/ctakes/dictionary/lookup2/util/UmlsUserApprover.java
------------------------------------------------------------------------------
    svn:eol-style = native