You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/03/13 13:53:48 UTC

svn commit: r1300107 - in /opennlp/sandbox: corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/ corpus-server/src/main/java/org/apache/opennlp/corpus_server/ corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/ cor...

Author: joern
Date: Tue Mar 13 12:53:48 2012
New Revision: 1300107

URL: http://svn.apache.org/viewvc?rev=1300107&view=rev
Log:
OPENNLP-340 Added support to remove a CAS from a corpus.

Added:
    opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java   (with props)
Modified:
    opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusResource.java
    opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusServer.java
    opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneIndexer.java
    opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneSearchService.java
    opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/SearchService.java
    opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorporaChangeListener.java
    opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorpusStore.java
    opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/DerbyCorpusStore.java
    opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/MemoryCorpusStore.java

Added: opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java?rev=1300107&view=auto
==============================================================================
--- opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java (added)
+++ opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java Tue Mar 13 12:53:48 2012
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.opennlp.corpus_server.tools;
+
+import javax.ws.rs.core.MediaType;
+
+import com.sun.jersey.api.client.Client;
+import com.sun.jersey.api.client.ClientResponse;
+import com.sun.jersey.api.client.WebResource;
+
+/**
+ * Command Line Tool to remove a CAS from a corpus.
+ */
+public class RemoveCAS {
+
+  public static void main(String[] args) {
+    
+    if (args.length != 2) {
+      System.out.println("RemoveCAS corpusAddress casId");
+      System.exit(-1);
+    }
+    
+    Client c = Client.create();
+
+    WebResource r = c.resource(args[0]);
+    
+    ClientResponse response = r
+        .path(args[1])
+        .delete(ClientResponse.class);
+    
+    System.out.println("Result: " + response.getStatus());
+  }
+  
+}

Propchange: opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusResource.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusResource.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusResource.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusResource.java Tue Mar 13 12:53:48 2012
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.util.List;
 
 import javax.ws.rs.Consumes;
+import javax.ws.rs.DELETE;
 import javax.ws.rs.GET;
 import javax.ws.rs.POST;
 import javax.ws.rs.PUT;
@@ -62,7 +63,7 @@ public class CorpusResource {
 	/**
 	 * Updates an existing CAS in the store.
 	 */
-	// TODO: Should fail is resource does not exist
+	// TODO: Should fail if resource does not exist
 	@PUT
 	@Consumes(MediaType.TEXT_XML)
 	@Path("{casId}")
@@ -71,6 +72,13 @@ public class CorpusResource {
 		corpus.updateCAS(casId, cas);
 	}
 	
+	@DELETE
+	@Path("{casId}")
+	public void removeCAS(@PathParam("casId") String casId)
+	    throws IOException {
+	  corpus.removeCAS(casId);
+	}
+	
 	/**
 	 * Retrieves an existing CAS form the store.
 	 * @param casId

Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusServer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusServer.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusServer.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusServer.java Tue Mar 13 12:53:48 2012
@@ -66,6 +66,16 @@ public class CorpusServer implements Ser
         LOGGER.log(Level.WARNING, "Failed to create index: " + store.getCorpusId(), e);
       }
     }
+    
+    @Override
+    public void removedCAS(CorpusStore store, String casId) {
+      try {
+        searchService.removeFromIndex(store, casId);
+      } catch (IOException e) {
+        LOGGER.log(Level.WARNING, "Failed to remove cas " + casId + 
+            "from  index " + store.getCorpusId(), e);
+      }
+    }
   }
   
   private final static Logger LOGGER = Logger.getLogger(

Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneIndexer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneIndexer.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneIndexer.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneIndexer.java Tue Mar 13 12:53:48 2012
@@ -92,14 +92,20 @@ public class LuceneIndexer extends Lucen
       throw new AnalysisEngineProcessException(new Exception("Missing cas id feature structure!"));
     
     Query idQuery = new TermQuery(new Term(LuceneSearchService.LUCENE_ID_FIELD, casId));
-      
-    Document doc = createDocument(cas);
-    doc.add(new Field(LuceneSearchService.LUCENE_ID_FIELD,
-        casId, Field.Store.YES, Field.Index.NOT_ANALYZED));
+    
+    // Note: A CAS with a null document text is removed from the index.
+    // This is used to remove a CAS from an index!
     
     try {
       indexWriter.deleteDocuments(idQuery);
-      indexWriter.addDocument(doc);
+      
+      if (cas.getDocumentText() != null) {
+        Document doc = createDocument(cas);
+        doc.add(new Field(LuceneSearchService.LUCENE_ID_FIELD,
+            casId, Field.Store.YES, Field.Index.NOT_ANALYZED));
+        
+        indexWriter.addDocument(doc);
+      }
       
       // TODO: Commit handling might need to be changed
       indexWriter.commit();

Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneSearchService.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneSearchService.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneSearchService.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneSearchService.java Tue Mar 13 12:53:48 2012
@@ -34,12 +34,14 @@ import java.util.logging.Logger;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
@@ -224,7 +226,6 @@ public class LuceneSearchService impleme
     
     AnalysisEngine indexer = corpusIndexerMap.get(corpusId);
     
-    
     InputStream indexTsIn = LuceneSearchService.class.getResourceAsStream(
         "/org/apache/opennlp/corpus_server/search/TypeSystem.xml");
     
@@ -250,7 +251,12 @@ public class LuceneSearchService impleme
     
     byte[] casBytes = store.getCAS(casId);
     
-    UimaUtil.deserializeXmiCAS(cas, new ByteArrayInputStream(casBytes));
+    if (casBytes != null) {
+      UimaUtil.deserializeXmiCAS(cas, new ByteArrayInputStream(casBytes));
+    }
+    else {
+      cas.setDocumentText(null);
+    }
     
     // Inject id feature structure into the CAS
     Type casIdType = cas.getTypeSystem().getType(LuceneIndexer.CAS_ID_TYPE);
@@ -268,6 +274,12 @@ public class LuceneSearchService impleme
   }
 
   @Override
+  public void removeFromIndex(CorpusStore store, String casId)
+      throws IOException {
+    index(store, casId);
+  }
+  
+  @Override
   public synchronized List<String> search(CorpusStore store, String q)
       throws IOException {
     

Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/SearchService.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/SearchService.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/SearchService.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/SearchService.java Tue Mar 13 12:53:48 2012
@@ -25,15 +25,55 @@ import org.apache.opennlp.corpus_server.
 
 public interface SearchService {
 
+  /**
+   * Initializes the search service, called before any other method.
+   * 
+   * @param corporaStore
+   * @throws IOException
+   */
   void initialize(CorporaStore corporaStore) throws IOException;
   
-  // index
+  /**
+   * Creates the index for the given corpus.
+   * 
+   * @param store
+   * @throws IOException
+   */
+  void createIndex(CorpusStore store) throws IOException;
+  
+  /**
+   * Indexes or re-indexes a CAS.
+   * 
+   * @param store
+   * @param casId
+   * @throws IOException
+   */
   void index(CorpusStore store, String casId) throws IOException;
   
-  List<String> search(CorpusStore store, String q) throws IOException;
+  /**
+   * Removes the CAS from the index.
+   * 
+   * @param store
+   * @param casId
+   * @throws IOException
+   */
+  void removeFromIndex(CorpusStore store, String casId) throws IOException;
   
-  void shutdown() throws IOException;
+  /**
+   * Search for CASes matching the given query in the given corpus.
+   * 
+   * @param store
+   * @param q
+   * @return
+   * @throws IOException
+   */
+  List<String> search(CorpusStore store, String q) throws IOException;
 
-  void createIndex(CorpusStore store) throws IOException;
-  
+  /**
+   * Shuts down the search service. No other methods expect initialize
+   * might be called after this.
+   * 
+   * @throws IOException
+   */
+  void shutdown() throws IOException;
 }

Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorporaChangeListener.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorporaChangeListener.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorporaChangeListener.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorporaChangeListener.java Tue Mar 13 12:53:48 2012
@@ -33,10 +33,18 @@ public interface CorporaChangeListener {
   void addedCAS(CorpusStore store, String casId);
   
   /**
-   * Indicates that a CAS was removed from the corpus.
+   * Indicates that a CAS was updated in the corpus.
    * 
    * @param store
    * @param casId
    */
   void updatedCAS(CorpusStore store, String casId);
+  
+  /**
+   * Indicates that a CAS was removed from the corpus.
+   * 
+   * @param store
+   * @param casId
+   */
+  void removedCAS(CorpusStore store, String casId);
 }

Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorpusStore.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorpusStore.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorpusStore.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorpusStore.java Tue Mar 13 12:53:48 2012
@@ -27,15 +27,65 @@ import org.apache.uima.resource.metadata
  */
 public interface CorpusStore {
   
+  /**
+   * Retrieves the unique id of the corpus.
+   * 
+   * @return the corpus id
+   */
   String getCorpusId();
   
+  /**
+   * Retrieves a CAS for a given id.
+   * 
+   * @param casId the id of the CAS to retrieve
+   * 
+   * @return the CAS
+   * 
+   * @throws IOException if retrieving the CAS is not possible
+   */
   byte[] getCAS(String casId) throws IOException;
   
+  /**
+   * Adds a CAS to the corpus with the given id. 
+   * 
+   * @param casID the id of the new CAS
+   * @param content the CAS in the XMI format
+   * 
+   * @throws IOException if storing the CAS is not possible
+   */
   void addCAS(String casID, byte[] content) throws IOException;
   
+  /**
+   * Updates the XMI content of an existing CAS.
+   * 
+   * @param casID the id of the CAS to update
+   * @param content the new content
+   * 
+   * @throws IOException if updating the CAS fails
+   */
   void updateCAS(String casID, byte[] content) throws IOException;
   
+  /**
+   * Removes a CAS of the given id from the corpus.
+   * 
+   * @param casID
+   * @throws IOException
+   */
+  void removeCAS(String casID) throws IOException;
+  
+  /**
+   * Retrieves the type system description of this corpus.
+   * 
+   * @return
+   * @throws IOException
+   */
   TypeSystemDescription getTypeSystem() throws IOException;
   
+  /**
+   * Retrieves the index mapping for this corpus.
+   * 
+   * @return
+   * @throws IOException
+   */
   byte[] getIndexMapping() throws IOException;
 }

Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/DerbyCorpusStore.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/DerbyCorpusStore.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/DerbyCorpusStore.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/DerbyCorpusStore.java Tue Mar 13 12:53:48 2012
@@ -158,6 +158,37 @@ public class DerbyCorpusStore implements
   }
   
   @Override
+  public void removeCAS(String casID) throws IOException {
+    
+    try {
+      Connection conn = dataSource.getConnection();
+      PreparedStatement ps = conn.prepareStatement("delete from " + 
+          corpusName + " where name = ?");
+      
+      ps.setString(1, casID);
+      
+      ps.executeUpdate();
+      
+      conn.commit();
+      
+      ps.close();
+      conn.close();
+    } catch (SQLException e) {
+      
+      if (LOGGER.isLoggable(Level.SEVERE)) {
+        LOGGER.log(Level.SEVERE, "Failed to remove CAS: " + 
+            casID, e);
+      }
+      
+      throw new IOException(e);
+    }
+    
+    for (CorporaChangeListener listener : store.getListeners()) {
+      listener.removedCAS(this, casID);
+    }
+  }
+  
+  @Override
   public TypeSystemDescription getTypeSystem() throws IOException {
     
     TypeSystemDescription tsDescription = null;

Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/MemoryCorpusStore.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/MemoryCorpusStore.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/MemoryCorpusStore.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/MemoryCorpusStore.java Tue Mar 13 12:53:48 2012
@@ -77,6 +77,11 @@ public class MemoryCorpusStore implement
     addCAS(casID, content);
   }
 
+  @Override
+  public void removeCAS(String casID) throws IOException {
+    casStore.remove(casID);
+  }
+  
   public TypeSystemDescription getTypeSystem() {
     return typeSystem;
   }