You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/03/13 13:53:48 UTC
svn commit: r1300107 - in /opennlp/sandbox:
corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/
corpus-server/src/main/java/org/apache/opennlp/corpus_server/
corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/ cor...
Author: joern
Date: Tue Mar 13 12:53:48 2012
New Revision: 1300107
URL: http://svn.apache.org/viewvc?rev=1300107&view=rev
Log:
OPENNLP-340 Added support to remove a CAS from a corpus.
Added:
opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java (with props)
Modified:
opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusResource.java
opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusServer.java
opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneIndexer.java
opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneSearchService.java
opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/SearchService.java
opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorporaChangeListener.java
opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorpusStore.java
opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/DerbyCorpusStore.java
opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/MemoryCorpusStore.java
Added: opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java?rev=1300107&view=auto
==============================================================================
--- opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java (added)
+++ opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java Tue Mar 13 12:53:48 2012
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.opennlp.corpus_server.tools;
+
+import javax.ws.rs.core.MediaType;
+
+import com.sun.jersey.api.client.Client;
+import com.sun.jersey.api.client.ClientResponse;
+import com.sun.jersey.api.client.WebResource;
+
+/**
+ * Command Line Tool to remove a CAS from a corpus.
+ */
+public class RemoveCAS {
+
+ public static void main(String[] args) {
+
+ if (args.length != 2) {
+ System.out.println("RemoveCAS corpusAddress casId");
+ System.exit(-1);
+ }
+
+ Client c = Client.create();
+
+ WebResource r = c.resource(args[0]);
+
+ ClientResponse response = r
+ .path(args[1])
+ .delete(ClientResponse.class);
+
+ System.out.println("Result: " + response.getStatus());
+ }
+
+}
Propchange: opennlp/sandbox/corpus-server-tools/src/main/java/org/apache/opennlp/corpus_server/tools/RemoveCAS.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusResource.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusResource.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusResource.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusResource.java Tue Mar 13 12:53:48 2012
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.util.List;
import javax.ws.rs.Consumes;
+import javax.ws.rs.DELETE;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.PUT;
@@ -62,7 +63,7 @@ public class CorpusResource {
/**
* Updates an existing CAS in the store.
*/
- // TODO: Should fail is resource does not exist
+ // TODO: Should fail if resource does not exist
@PUT
@Consumes(MediaType.TEXT_XML)
@Path("{casId}")
@@ -71,6 +72,13 @@ public class CorpusResource {
corpus.updateCAS(casId, cas);
}
+ @DELETE
+ @Path("{casId}")
+ public void removeCAS(@PathParam("casId") String casId)
+ throws IOException {
+ corpus.removeCAS(casId);
+ }
+
/**
* Retrieves an existing CAS form the store.
* @param casId
Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusServer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusServer.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusServer.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/CorpusServer.java Tue Mar 13 12:53:48 2012
@@ -66,6 +66,16 @@ public class CorpusServer implements Ser
LOGGER.log(Level.WARNING, "Failed to create index: " + store.getCorpusId(), e);
}
}
+
+ @Override
+ public void removedCAS(CorpusStore store, String casId) {
+ try {
+ searchService.removeFromIndex(store, casId);
+ } catch (IOException e) {
+ LOGGER.log(Level.WARNING, "Failed to remove cas " + casId +
+ "from index " + store.getCorpusId(), e);
+ }
+ }
}
private final static Logger LOGGER = Logger.getLogger(
Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneIndexer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneIndexer.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneIndexer.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneIndexer.java Tue Mar 13 12:53:48 2012
@@ -92,14 +92,20 @@ public class LuceneIndexer extends Lucen
throw new AnalysisEngineProcessException(new Exception("Missing cas id feature structure!"));
Query idQuery = new TermQuery(new Term(LuceneSearchService.LUCENE_ID_FIELD, casId));
-
- Document doc = createDocument(cas);
- doc.add(new Field(LuceneSearchService.LUCENE_ID_FIELD,
- casId, Field.Store.YES, Field.Index.NOT_ANALYZED));
+
+ // Note: A CAS with a null document text is removed from the index.
+ // This is used to remove a CAS from an index!
try {
indexWriter.deleteDocuments(idQuery);
- indexWriter.addDocument(doc);
+
+ if (cas.getDocumentText() != null) {
+ Document doc = createDocument(cas);
+ doc.add(new Field(LuceneSearchService.LUCENE_ID_FIELD,
+ casId, Field.Store.YES, Field.Index.NOT_ANALYZED));
+
+ indexWriter.addDocument(doc);
+ }
// TODO: Commit handling might need to be changed
indexWriter.commit();
Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneSearchService.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneSearchService.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneSearchService.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/LuceneSearchService.java Tue Mar 13 12:53:48 2012
@@ -34,12 +34,14 @@ import java.util.logging.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
@@ -224,7 +226,6 @@ public class LuceneSearchService impleme
AnalysisEngine indexer = corpusIndexerMap.get(corpusId);
-
InputStream indexTsIn = LuceneSearchService.class.getResourceAsStream(
"/org/apache/opennlp/corpus_server/search/TypeSystem.xml");
@@ -250,7 +251,12 @@ public class LuceneSearchService impleme
byte[] casBytes = store.getCAS(casId);
- UimaUtil.deserializeXmiCAS(cas, new ByteArrayInputStream(casBytes));
+ if (casBytes != null) {
+ UimaUtil.deserializeXmiCAS(cas, new ByteArrayInputStream(casBytes));
+ }
+ else {
+ cas.setDocumentText(null);
+ }
// Inject id feature structure into the CAS
Type casIdType = cas.getTypeSystem().getType(LuceneIndexer.CAS_ID_TYPE);
@@ -268,6 +274,12 @@ public class LuceneSearchService impleme
}
@Override
+ public void removeFromIndex(CorpusStore store, String casId)
+ throws IOException {
+ index(store, casId);
+ }
+
+ @Override
public synchronized List<String> search(CorpusStore store, String q)
throws IOException {
Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/SearchService.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/SearchService.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/SearchService.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/search/SearchService.java Tue Mar 13 12:53:48 2012
@@ -25,15 +25,55 @@ import org.apache.opennlp.corpus_server.
public interface SearchService {
+ /**
+ * Initializes the search service, called before any other method.
+ *
+ * @param corporaStore
+ * @throws IOException
+ */
void initialize(CorporaStore corporaStore) throws IOException;
- // index
+ /**
+ * Creates the index for the given corpus.
+ *
+ * @param store
+ * @throws IOException
+ */
+ void createIndex(CorpusStore store) throws IOException;
+
+ /**
+ * Indexes or re-indexes a CAS.
+ *
+ * @param store
+ * @param casId
+ * @throws IOException
+ */
void index(CorpusStore store, String casId) throws IOException;
- List<String> search(CorpusStore store, String q) throws IOException;
+ /**
+ * Removes the CAS from the index.
+ *
+ * @param store
+ * @param casId
+ * @throws IOException
+ */
+ void removeFromIndex(CorpusStore store, String casId) throws IOException;
- void shutdown() throws IOException;
+ /**
+ * Search for CASes matching the given query in the given corpus.
+ *
+ * @param store
+ * @param q
+ * @return
+ * @throws IOException
+ */
+ List<String> search(CorpusStore store, String q) throws IOException;
- void createIndex(CorpusStore store) throws IOException;
-
+ /**
+ * Shuts down the search service. No other methods expect initialize
+ * might be called after this.
+ *
+ * @throws IOException
+ */
+ void shutdown() throws IOException;
}
Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorporaChangeListener.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorporaChangeListener.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorporaChangeListener.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorporaChangeListener.java Tue Mar 13 12:53:48 2012
@@ -33,10 +33,18 @@ public interface CorporaChangeListener {
void addedCAS(CorpusStore store, String casId);
/**
- * Indicates that a CAS was removed from the corpus.
+ * Indicates that a CAS was updated in the corpus.
*
* @param store
* @param casId
*/
void updatedCAS(CorpusStore store, String casId);
+
+ /**
+ * Indicates that a CAS was removed from the corpus.
+ *
+ * @param store
+ * @param casId
+ */
+ void removedCAS(CorpusStore store, String casId);
}
Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorpusStore.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorpusStore.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorpusStore.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/CorpusStore.java Tue Mar 13 12:53:48 2012
@@ -27,15 +27,65 @@ import org.apache.uima.resource.metadata
*/
public interface CorpusStore {
+ /**
+ * Retrieves the unique id of the corpus.
+ *
+ * @return the corpus id
+ */
String getCorpusId();
+ /**
+ * Retrieves a CAS for a given id.
+ *
+ * @param casId the id of the CAS to retrieve
+ *
+ * @return the CAS
+ *
+ * @throws IOException if retrieving the CAS is not possible
+ */
byte[] getCAS(String casId) throws IOException;
+ /**
+ * Adds a CAS to the corpus with the given id.
+ *
+ * @param casID the id of the new CAS
+ * @param content the CAS in the XMI format
+ *
+ * @throws IOException if storing the CAS is not possible
+ */
void addCAS(String casID, byte[] content) throws IOException;
+ /**
+ * Updates the XMI content of an existing CAS.
+ *
+ * @param casID the id of the CAS to update
+ * @param content the new content
+ *
+ * @throws IOException if updating the CAS fails
+ */
void updateCAS(String casID, byte[] content) throws IOException;
+ /**
+ * Removes a CAS of the given id from the corpus.
+ *
+ * @param casID
+ * @throws IOException
+ */
+ void removeCAS(String casID) throws IOException;
+
+ /**
+ * Retrieves the type system description of this corpus.
+ *
+ * @return
+ * @throws IOException
+ */
TypeSystemDescription getTypeSystem() throws IOException;
+ /**
+ * Retrieves the index mapping for this corpus.
+ *
+ * @return
+ * @throws IOException
+ */
byte[] getIndexMapping() throws IOException;
}
Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/DerbyCorpusStore.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/DerbyCorpusStore.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/DerbyCorpusStore.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/DerbyCorpusStore.java Tue Mar 13 12:53:48 2012
@@ -158,6 +158,37 @@ public class DerbyCorpusStore implements
}
@Override
+ public void removeCAS(String casID) throws IOException {
+
+ try {
+ Connection conn = dataSource.getConnection();
+ PreparedStatement ps = conn.prepareStatement("delete from " +
+ corpusName + " where name = ?");
+
+ ps.setString(1, casID);
+
+ ps.executeUpdate();
+
+ conn.commit();
+
+ ps.close();
+ conn.close();
+ } catch (SQLException e) {
+
+ if (LOGGER.isLoggable(Level.SEVERE)) {
+ LOGGER.log(Level.SEVERE, "Failed to remove CAS: " +
+ casID, e);
+ }
+
+ throw new IOException(e);
+ }
+
+ for (CorporaChangeListener listener : store.getListeners()) {
+ listener.removedCAS(this, casID);
+ }
+ }
+
+ @Override
public TypeSystemDescription getTypeSystem() throws IOException {
TypeSystemDescription tsDescription = null;
Modified: opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/MemoryCorpusStore.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/MemoryCorpusStore.java?rev=1300107&r1=1300106&r2=1300107&view=diff
==============================================================================
--- opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/MemoryCorpusStore.java (original)
+++ opennlp/sandbox/corpus-server/src/main/java/org/apache/opennlp/corpus_server/store/MemoryCorpusStore.java Tue Mar 13 12:53:48 2012
@@ -77,6 +77,11 @@ public class MemoryCorpusStore implement
addCAS(casID, content);
}
+ @Override
+ public void removeCAS(String casID) throws IOException {
+ casStore.remove(casID);
+ }
+
public TypeSystemDescription getTypeSystem() {
return typeSystem;
}