You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2012/10/31 06:26:55 UTC
svn commit: r1403989 [13/28] - in /incubator/ctakes/branches/SHARPn-cTAKES:
Constituency Parser/src/org/chboston/cnlp/ctakes/parser/ Constituency
Parser/src/org/chboston/cnlp/ctakes/parser/uima/ae/ Constituency
Parser/src/org/chboston/cnlp/ctakes/parse...
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/CasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/CasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/CasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/CasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,105 +14,105 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.OutputStream;
-
-import org.apache.log4j.Logger;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.impl.XCASSerializer;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-
-import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
-
-
-/**
- * CasConsumer that writes a JCas (the current view) to an xml file
- *
- * @author Mayo Clinic
- */
-public class CasConsumer extends CasConsumer_ImplBase
-{
- // LOG4J logger based on class name
- private Logger iv_logger = Logger.getLogger(getClass().getName());
-
- private String iv_outputDir = null;
-
- // iv_procCount is used to name the output files sequentially if there
- // is a problem with naming based on source names
- private int iv_procCount = 0;
-
-
- /**
- * Read in configuration parameters
- */
- public void initialize() throws ResourceInitializationException {
- iv_outputDir = (String) getConfigParameterValue("outputDir");
- }
-
-
- /**
- * Write a formatted xml file containing data from the view.
- * The file name will come from the DocumentID annotation,
- * which is associated with a view.
- * We append .xml to the DocumentID/filename
- */
- private void processView(JCas view) throws Exception {
- // String docText = view.getDocumentText();
-
- String docName = DocumentIDAnnotationUtil.getDocumentID(view);
-
- File outputFile;
- if (docName==null) {
- docName = "doc" + iv_procCount + ".xml";
- }
- else {
- docName = docName + ".xml";
- // if (!docName.endsWith(".xml")) {
- // docName = docName + ".xml";
- // }
- }
-
- OutputStream out=null;
- try {
- File outputDir = new File(iv_outputDir);
- outputDir.mkdirs();
- outputFile = new File(iv_outputDir + File.separatorChar + docName);
- out = new FileOutputStream(outputFile);
- XCASSerializer.serialize(view.getCas(), out, true); // true -> formats the output
- }
- finally {
- iv_procCount++;
- if (out != null) {
- out.close();
- }
- }
-
- }
-
-
- /**
- * Create an xml file from the data in the cas.
- */
- public void processCas(CAS cas) throws ResourceProcessException {
-
- iv_logger.info("Started");
-
- try {
-
- JCas currentView = cas.getCurrentView().getJCas();
- processView(currentView);
-
- } catch (Exception e) {
- throw new ResourceProcessException(e);
- }
-
- }
-
-}
\ No newline at end of file
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStream;
+
+import org.apache.log4j.Logger;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.impl.XCASSerializer;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+
+import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
+
+
+/**
+ * CasConsumer that writes a JCas (the current view) to an xml file
+ *
+ * @author Mayo Clinic
+ */
+public class CasConsumer extends CasConsumer_ImplBase
+{
+ // LOG4J logger based on class name
+ private Logger iv_logger = Logger.getLogger(getClass().getName());
+
+ private String iv_outputDir = null;
+
+ // iv_procCount is used to name the output files sequentially if there
+ // is a problem with naming based on source names
+ private int iv_procCount = 0;
+
+
+ /**
+ * Read in configuration parameters
+ */
+ public void initialize() throws ResourceInitializationException {
+ iv_outputDir = (String) getConfigParameterValue("outputDir");
+ }
+
+
+ /**
+ * Write a formatted xml file containing data from the view.
+ * The file name will come from the DocumentID annotation,
+ * which is associated with a view.
+ * We append .xml to the DocumentID/filename
+ */
+ private void processView(JCas view) throws Exception {
+ // String docText = view.getDocumentText();
+
+ String docName = DocumentIDAnnotationUtil.getDocumentID(view);
+
+ File outputFile;
+ if (docName==null) {
+ docName = "doc" + iv_procCount + ".xml";
+ }
+ else {
+ docName = docName + ".xml";
+ // if (!docName.endsWith(".xml")) {
+ // docName = docName + ".xml";
+ // }
+ }
+
+ OutputStream out=null;
+ try {
+ File outputDir = new File(iv_outputDir);
+ outputDir.mkdirs();
+ outputFile = new File(iv_outputDir + File.separatorChar + docName);
+ out = new FileOutputStream(outputFile);
+ XCASSerializer.serialize(view.getCas(), out, true); // true -> formats the output
+ }
+ finally {
+ iv_procCount++;
+ if (out != null) {
+ out.close();
+ }
+ }
+
+ }
+
+
+ /**
+ * Create an xml file from the data in the cas.
+ */
+ public void processCas(CAS cas) throws ResourceProcessException {
+
+ iv_logger.info("Started");
+
+ try {
+
+ JCas currentView = cas.getCurrentView().getJCas();
+ processView(currentView);
+
+ } catch (Exception e) {
+ throw new ResourceProcessException(e);
+ }
+
+ }
+
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/FilesInDirectoryCasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/FilesInDirectoryCasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/FilesInDirectoryCasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/FilesInDirectoryCasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,95 +14,95 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-
-import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
-
-/**
- * For each CAS a local file with the document text is written to a directory specifed by a parameter.
- * This CAS consumer does not make use of any annotation information in the cas except for the document
- * id specified the CommonTypeSystem.xml descriptor. The document id will be the name of the file written
- * for each CAS.
- *
- * This CAS consumer may be useful if you want to write the results of a collection reader and/or CAS
- * initializer to the local file system. For example, a JDBC Collection Reader may read XML documents
- * from a database and a specialized cas initializer may convert the XML to plain text. The
- * FilesInDirectoryCasConsumer can now be used to write the plain text to local plain text files.
- */
-
-public class FilesInDirectoryCasConsumer extends CasConsumer_ImplBase {
-
- public static final String PARAM_OUTPUTDIR = "OutputDirectory";
-
- File iv_outputDirectory;
-
- public void initialize() throws ResourceInitializationException
- {
- String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
- iv_outputDirectory = new File(outputDirectoryName);
- if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
- throw new ResourceInitializationException(
- new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
- }
-
- public void processCas(CAS cas) throws ResourceProcessException
- {
- try
- {
- JCas jcas;
- jcas = cas.getJCas();
- // jcas = cas.getJCas().getView("_InitialView");
- // jcas = cas.getJCas().getView("plaintext");
-
- String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
- String documentText = jcas.getDocumentText();
-
- if (documentID==null) {
-
- jcas = cas.getJCas().getView("_InitialView");
- documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
-
- if (documentID==null) {
-
- jcas = cas.getJCas().getView("plaintext");
- documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
-
- if (documentID==null) {
- documentID = "doc_"+new java.util.Date().getTime()+".xml"; // use timestamp in name: doc_TIMESTAMP.xml
- System.err.println("Unable to find DocumentIDAnnotation, using " + documentID);
- }
- }
-
- }
-
- writeToFile(documentID, documentText);
-
- }
- catch(Exception e)
- {
- throw new ResourceProcessException(e);
- }
- }
-
- private void writeToFile(String documentID, String documentText) throws IOException
- {
- File outputFile = new File(iv_outputDirectory, documentID);
- outputFile.createNewFile();
- OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
- out.write(documentText.getBytes());
- out.flush();
- out.close();
- }
-}
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+
+import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
+
+/**
+ * For each CAS a local file with the document text is written to a directory specifed by a parameter.
+ * This CAS consumer does not make use of any annotation information in the cas except for the document
+ * id specified the CommonTypeSystem.xml descriptor. The document id will be the name of the file written
+ * for each CAS.
+ *
+ * This CAS consumer may be useful if you want to write the results of a collection reader and/or CAS
+ * initializer to the local file system. For example, a JDBC Collection Reader may read XML documents
+ * from a database and a specialized cas initializer may convert the XML to plain text. The
+ * FilesInDirectoryCasConsumer can now be used to write the plain text to local plain text files.
+ */
+
+public class FilesInDirectoryCasConsumer extends CasConsumer_ImplBase {
+
+ public static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+ File iv_outputDirectory;
+
+ public void initialize() throws ResourceInitializationException
+ {
+ String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
+ iv_outputDirectory = new File(outputDirectoryName);
+ if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
+ throw new ResourceInitializationException(
+ new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
+ }
+
+ public void processCas(CAS cas) throws ResourceProcessException
+ {
+ try
+ {
+ JCas jcas;
+ jcas = cas.getJCas();
+ // jcas = cas.getJCas().getView("_InitialView");
+ // jcas = cas.getJCas().getView("plaintext");
+
+ String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+ String documentText = jcas.getDocumentText();
+
+ if (documentID==null) {
+
+ jcas = cas.getJCas().getView("_InitialView");
+ documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+
+ if (documentID==null) {
+
+ jcas = cas.getJCas().getView("plaintext");
+ documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+
+ if (documentID==null) {
+ documentID = "doc_"+new java.util.Date().getTime()+".xml"; // use timestamp in name: doc_TIMESTAMP.xml
+ System.err.println("Unable to find DocumentIDAnnotation, using " + documentID);
+ }
+ }
+
+ }
+
+ writeToFile(documentID, documentText);
+
+ }
+ catch(Exception e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+
+ private void writeToFile(String documentID, String documentText) throws IOException
+ {
+ File outputFile = new File(iv_outputDirectory, documentID);
+ outputFile.createNewFile();
+ OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
+ out.write(documentText.getBytes());
+ out.flush();
+ out.close();
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/HtmlTableCasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/HtmlTableCasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/HtmlTableCasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/HtmlTableCasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,397 +14,397 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.StringTokenizer;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-
-import edu.mayo.bmi.uima.core.util.JCasUtil;
-
-/**
- * Outputs an HTML table that visualizes the specified Annotation objects over
- * the document text.
- *
- * @author Mayo Clinic
- *
- */
-public class HtmlTableCasConsumer extends CasConsumer_ImplBase
-{
- private File iv_outputDir;
-
- private int iv_tableSpanType;
-
- private int[] iv_nestedAnnTypeArr;
-
- // key = annotation type (java.lang.Integer)
- // val = getter method (java.lang.reflect.Method)
- private Map iv_getterMethMap = new HashMap();
-
- private int iv_count;
-
- private String[] iv_tdStyleArr = { "I", "B" };
-
- public void initialize() throws ResourceInitializationException
- {
- try
- {
- iv_outputDir = new File(
- (String) getConfigParameterValue("OutputDir"));
-
- String classname;
- classname = (String) getConfigParameterValue("TableSpanAnnotation");
- iv_tableSpanType = JCasUtil.getType(classname);
-
- String[] strArr = (String[]) getConfigParameterValue("NestedAnnotations");
- iv_nestedAnnTypeArr = new int[strArr.length];
- for (int i = 0; i < strArr.length; i++)
- {
- StringTokenizer st = new StringTokenizer(strArr[i], "|");
- classname = st.nextToken().trim();
- iv_nestedAnnTypeArr[i] = JCasUtil.getType(classname);
-
- // if there's an extra token, it must be a getter methodname
- if (st.countTokens() == 1)
- {
- String methName = st.nextToken().trim();
- Class c = Class.forName(classname);
- Method meth = c.getMethod(methName, (Class[]) null);
- iv_getterMethMap.put(new Integer(iv_nestedAnnTypeArr[i]),
- meth);
- }
- }
-
- } catch (Exception e)
- {
- throw new ResourceInitializationException(e);
- }
- }
-
- public void processCas(CAS cas) throws ResourceProcessException
- {
- try
- {
- JCas jcas = cas.getJCas();
- StringBuffer htmlSB = new StringBuffer();
- htmlSB.append("<HTML>");
- htmlSB.append("<TITLE>?</TITLE>");
- htmlSB.append("<BODY>");
-
- Iterator tSpanItr = jcas.getJFSIndexRepository()
- .getAnnotationIndex(iv_tableSpanType).iterator();
- while (tSpanItr.hasNext())
- {
- Annotation tSpanAnn = (Annotation) tSpanItr.next();
- String tSpanText = tSpanAnn.getCoveredText();
-
- htmlSB.append("<TABLE border=1>");
- htmlSB.append("<TR bordercolor=\"white\">");
- for (int i = 0; i < tSpanText.length(); i++)
- {
- htmlSB.append("<TD width=10>");
- htmlSB.append(tSpanText.charAt(i));
- htmlSB.append("</TD>");
- }
- htmlSB.append("</TR>");
-
- int tdStyleIdx = 0;
- for (int nestIdx = 0; nestIdx < iv_nestedAnnTypeArr.length; nestIdx++)
- {
- List nestedAnnList = getAnnotations(jcas,
- iv_nestedAnnTypeArr[nestIdx], tSpanAnn.getBegin(),
- tSpanAnn.getEnd());
-
- // sort nested annotation list
- Collections.sort(nestedAnnList,
- new AnnotationLengthComparator());
-
- List annotsAtRowList = arrangeIntoRows(tSpanAnn,
- nestedAnnList);
-
- Iterator trAnnItr = annotsAtRowList.iterator();
- while (trAnnItr.hasNext())
- {
- htmlSB.append("<TR>");
- int cursor = tSpanAnn.getBegin();
- List annList = (List) trAnnItr.next();
-
- // sort annotations in this row by offset position
- Collections.sort(annList,
- new AnnotationPositionComparator());
-
- Iterator annItr = annList.iterator();
- while (annItr.hasNext())
- {
- Annotation ann = (Annotation) annItr.next();
- // account for preceeding whitespace
- int delta = ann.getBegin() - cursor;
- if (delta > 0)
- {
- htmlSB.append("<TD width=10 colspan=" + delta
- + ">");
- String whitespaceStr = "";
- for (int i = 0; i < delta; i++)
- {
- whitespaceStr += ' ';
- }
- htmlSB.append(whitespaceStr);
- htmlSB.append("</TD>");
- }
- cursor = ann.getEnd();
-
- htmlSB
- .append("<TD width=10 align=\"center\" colspan="
- + ann.getCoveredText().length()
- + ">");
- htmlSB.append("<");
- htmlSB.append(iv_tdStyleArr[tdStyleIdx]);
- htmlSB.append(">");
- htmlSB.append(getDisplayValue(
- iv_nestedAnnTypeArr[nestIdx], ann));
- htmlSB.append("</");
- htmlSB.append(iv_tdStyleArr[tdStyleIdx]);
- htmlSB.append(">");
- htmlSB.append("</TD>");
- }
- htmlSB.append("</TR>");
- }
-
- tdStyleIdx++;
- if (tdStyleIdx == iv_tdStyleArr.length)
- {
- tdStyleIdx = 0;
- }
- }
- htmlSB.append("</BR>");
- htmlSB.append("</BR>");
-
- htmlSB.append("</TABLE>");
- }
-
- htmlSB.append("</BODY>");
- htmlSB.append("</HTML>");
-
- File f = new File(iv_outputDir.getAbsolutePath() + File.separator
- + "doc" + iv_count + ".html");
- f.createNewFile();
- BufferedWriter bw = new BufferedWriter(new FileWriter(f));
- bw.write(htmlSB.toString());
- bw.close();
-
- } catch (Exception e)
- {
- throw new ResourceProcessException(e);
- }
- iv_count++;
- }
-
- /**
- * Gets a value to be displayed in table cell for the given annotation
- * object.
- *
- * @param annType
- * @param ann
- * @return
- */
- private String getDisplayValue(int annType, Annotation ann)
- throws IllegalAccessException, InvocationTargetException
- {
- Integer key = new Integer(annType);
- if (iv_getterMethMap.containsKey(key))
- {
- Method meth = (Method) iv_getterMethMap.get(key);
- Object val = meth.invoke(ann, (Object[]) null);
- if (val != null)
- {
- return String.valueOf(val);
- } else
- {
- // otherwise return empty string
- return "";
- }
- } else
- {
- String typeName = ann.getType().getShortName();
- return typeName.substring(0, typeName.indexOf("Annotation"));
- }
- }
-
- /**
- * Arranges the list of annotations into one or more rows. Each element of
- * the return List represents a row. Each row is represented as a row of
- * Annotation objects that below to that row.
- *
- * @param tSpanAnn
- * @param nestedAnnList
- * @return
- */
- private List arrangeIntoRows(Annotation tSpanAnn, List nestedAnnList)
- {
- int tSpanSize = tSpanAnn.getCoveredText().length();
- List maskAtRowList = new ArrayList();
- maskAtRowList.add(new BitSet(tSpanSize));
-
- List annotsAtRowList = new ArrayList();
-
- // divide parse annotations into rows
- while (nestedAnnList.size() != 0)
- {
- // pop annotation off
- Annotation ann = (Annotation) nestedAnnList.remove(0);
-
- BitSet annBitSet = new BitSet(tSpanSize);
- annBitSet.set(ann.getBegin() - tSpanAnn.getBegin(), ann.getEnd()
- - tSpanAnn.getBegin());
-
- // figure out which TR to place it in
- int idx = 0;
- boolean rowFound = false;
- while (!rowFound)
- {
- BitSet trBitSet = (BitSet) maskAtRowList.get(idx);
-
- // interset BitSets to determine if annotation will fit
- // in this row
- while (trBitSet.intersects(annBitSet))
- {
- idx++;
- if ((idx + 1) > maskAtRowList.size())
- {
- trBitSet = new BitSet(tSpanSize);
- maskAtRowList.add(trBitSet);
- } else
- {
- trBitSet = (BitSet) maskAtRowList.get(idx);
- }
- }
- trBitSet.or(annBitSet);
- rowFound = true;
- }
-
- List annList = null;
- if ((idx + 1) > annotsAtRowList.size())
- {
- annList = new ArrayList();
- annList.add(ann);
- annotsAtRowList.add(annList);
- } else
- {
- annList = (List) annotsAtRowList.get(idx);
- annList.add(ann);
- }
- }
- return annotsAtRowList;
- }
-
- /**
- * Comparator for comparing two Annotation objects based on span length.
- *
- * @author Mayo Clinic
- *
- */
- class AnnotationLengthComparator implements Comparator
- {
- /*
- * (non-Javadoc)
- *
- * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
- */
- public int compare(Object o1, Object o2)
- {
- Annotation a1 = (Annotation) o1;
- Annotation a2 = (Annotation) o2;
-
- Integer len1 = new Integer(a1.getCoveredText().length());
- Integer len2 = new Integer(a2.getCoveredText().length());
-
- if (len1.equals(len2))
- {
- if (a1.getBegin() < a2.getBegin())
- return -1;
- else if (a1.getBegin() > a2.getBegin())
- return 1;
- else
- {
- if (a1.getEnd() < a2.getEnd())
- return 1;
- else if (a1.getEnd() > a2.getEnd())
- return -1;
- else
- return 0;
- }
- } else
- {
- return len1.compareTo(len2);
- }
- }
- }
-
- /**
- * Comparator for comparing two Annotation objects based on offset position.
- *
- * @author Mayo Clinic
- *
- */
- class AnnotationPositionComparator implements Comparator
- {
- /*
- * (non-Javadoc)
- *
- * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
- */
- public int compare(Object o1, Object o2)
- {
- Annotation a1 = (Annotation) o1;
- Annotation a2 = (Annotation) o2;
-
- if (a1.getBegin() < a2.getBegin())
- return -1;
- else if (a1.getBegin() > a2.getBegin())
- return 1;
- else
- {
- if (a1.getEnd() < a2.getEnd())
- return 1;
- else if (a1.getEnd() > a2.getEnd())
- return -1;
- else
- return 0;
- }
- }
- }
-
- private List getAnnotations(JCas jcas, int annType, int begin, int end)
- {
- List list = new ArrayList();
- Iterator itr = jcas.getJFSIndexRepository().getAnnotationIndex(annType)
- .iterator();
- while (itr.hasNext())
- {
- Annotation ann = (Annotation) itr.next();
- if ((ann.getBegin() >= begin) && (ann.getEnd() <= end))
- {
- list.add(ann);
- }
- }
- return list;
- }
-}
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+
+import edu.mayo.bmi.uima.core.util.JCasUtil;
+
+/**
+ * Outputs an HTML table that visualizes the specified Annotation objects over
+ * the document text.
+ *
+ * @author Mayo Clinic
+ *
+ */
+public class HtmlTableCasConsumer extends CasConsumer_ImplBase
+{
+ private File iv_outputDir;
+
+ private int iv_tableSpanType;
+
+ private int[] iv_nestedAnnTypeArr;
+
+ // key = annotation type (java.lang.Integer)
+ // val = getter method (java.lang.reflect.Method)
+ private Map iv_getterMethMap = new HashMap();
+
+ private int iv_count;
+
+ private String[] iv_tdStyleArr = { "I", "B" };
+
+ public void initialize() throws ResourceInitializationException
+ {
+ try
+ {
+ iv_outputDir = new File(
+ (String) getConfigParameterValue("OutputDir"));
+
+ String classname;
+ classname = (String) getConfigParameterValue("TableSpanAnnotation");
+ iv_tableSpanType = JCasUtil.getType(classname);
+
+ String[] strArr = (String[]) getConfigParameterValue("NestedAnnotations");
+ iv_nestedAnnTypeArr = new int[strArr.length];
+ for (int i = 0; i < strArr.length; i++)
+ {
+ StringTokenizer st = new StringTokenizer(strArr[i], "|");
+ classname = st.nextToken().trim();
+ iv_nestedAnnTypeArr[i] = JCasUtil.getType(classname);
+
+ // if there's an extra token, it must be a getter methodname
+ if (st.countTokens() == 1)
+ {
+ String methName = st.nextToken().trim();
+ Class c = Class.forName(classname);
+ Method meth = c.getMethod(methName, (Class[]) null);
+ iv_getterMethMap.put(new Integer(iv_nestedAnnTypeArr[i]),
+ meth);
+ }
+ }
+
+ } catch (Exception e)
+ {
+ throw new ResourceInitializationException(e);
+ }
+ }
+
+ public void processCas(CAS cas) throws ResourceProcessException
+ {
+ try
+ {
+ JCas jcas = cas.getJCas();
+ StringBuffer htmlSB = new StringBuffer();
+ htmlSB.append("<HTML>");
+ htmlSB.append("<TITLE>?</TITLE>");
+ htmlSB.append("<BODY>");
+
+ Iterator tSpanItr = jcas.getJFSIndexRepository()
+ .getAnnotationIndex(iv_tableSpanType).iterator();
+ while (tSpanItr.hasNext())
+ {
+ Annotation tSpanAnn = (Annotation) tSpanItr.next();
+ String tSpanText = tSpanAnn.getCoveredText();
+
+ htmlSB.append("<TABLE border=1>");
+ htmlSB.append("<TR bordercolor=\"white\">");
+ for (int i = 0; i < tSpanText.length(); i++)
+ {
+ htmlSB.append("<TD width=10>");
+ htmlSB.append(tSpanText.charAt(i));
+ htmlSB.append("</TD>");
+ }
+ htmlSB.append("</TR>");
+
+ int tdStyleIdx = 0;
+ for (int nestIdx = 0; nestIdx < iv_nestedAnnTypeArr.length; nestIdx++)
+ {
+ List nestedAnnList = getAnnotations(jcas,
+ iv_nestedAnnTypeArr[nestIdx], tSpanAnn.getBegin(),
+ tSpanAnn.getEnd());
+
+ // sort nested annotation list
+ Collections.sort(nestedAnnList,
+ new AnnotationLengthComparator());
+
+ List annotsAtRowList = arrangeIntoRows(tSpanAnn,
+ nestedAnnList);
+
+ Iterator trAnnItr = annotsAtRowList.iterator();
+ while (trAnnItr.hasNext())
+ {
+ htmlSB.append("<TR>");
+ int cursor = tSpanAnn.getBegin();
+ List annList = (List) trAnnItr.next();
+
+ // sort annotations in this row by offset position
+ Collections.sort(annList,
+ new AnnotationPositionComparator());
+
+ Iterator annItr = annList.iterator();
+ while (annItr.hasNext())
+ {
+ Annotation ann = (Annotation) annItr.next();
+ // account for preceeding whitespace
+ int delta = ann.getBegin() - cursor;
+ if (delta > 0)
+ {
+ htmlSB.append("<TD width=10 colspan=" + delta
+ + ">");
+ String whitespaceStr = "";
+ for (int i = 0; i < delta; i++)
+ {
+ whitespaceStr += ' ';
+ }
+ htmlSB.append(whitespaceStr);
+ htmlSB.append("</TD>");
+ }
+ cursor = ann.getEnd();
+
+ htmlSB
+ .append("<TD width=10 align=\"center\" colspan="
+ + ann.getCoveredText().length()
+ + ">");
+ htmlSB.append("<");
+ htmlSB.append(iv_tdStyleArr[tdStyleIdx]);
+ htmlSB.append(">");
+ htmlSB.append(getDisplayValue(
+ iv_nestedAnnTypeArr[nestIdx], ann));
+ htmlSB.append("</");
+ htmlSB.append(iv_tdStyleArr[tdStyleIdx]);
+ htmlSB.append(">");
+ htmlSB.append("</TD>");
+ }
+ htmlSB.append("</TR>");
+ }
+
+ tdStyleIdx++;
+ if (tdStyleIdx == iv_tdStyleArr.length)
+ {
+ tdStyleIdx = 0;
+ }
+ }
+ htmlSB.append("</BR>");
+ htmlSB.append("</BR>");
+
+ htmlSB.append("</TABLE>");
+ }
+
+ htmlSB.append("</BODY>");
+ htmlSB.append("</HTML>");
+
+ File f = new File(iv_outputDir.getAbsolutePath() + File.separator
+ + "doc" + iv_count + ".html");
+ f.createNewFile();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(f));
+ bw.write(htmlSB.toString());
+ bw.close();
+
+ } catch (Exception e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ iv_count++;
+ }
+
+ /**
+ * Gets a value to be displayed in table cell for the given annotation
+ * object.
+ *
+ * @param annType
+ * @param ann
+ * @return
+ */
+ private String getDisplayValue(int annType, Annotation ann)
+ throws IllegalAccessException, InvocationTargetException
+ {
+ Integer key = new Integer(annType);
+ if (iv_getterMethMap.containsKey(key))
+ {
+ Method meth = (Method) iv_getterMethMap.get(key);
+ Object val = meth.invoke(ann, (Object[]) null);
+ if (val != null)
+ {
+ return String.valueOf(val);
+ } else
+ {
+ // otherwise return empty string
+ return "";
+ }
+ } else
+ {
+ String typeName = ann.getType().getShortName();
+ return typeName.substring(0, typeName.indexOf("Annotation"));
+ }
+ }
+
+ /**
+ * Arranges the list of annotations into one or more rows. Each element of
+ * the return List represents a row. Each row is represented as a row of
+ * Annotation objects that below to that row.
+ *
+ * @param tSpanAnn
+ * @param nestedAnnList
+ * @return
+ */
+ private List arrangeIntoRows(Annotation tSpanAnn, List nestedAnnList)
+ {
+ int tSpanSize = tSpanAnn.getCoveredText().length();
+ List maskAtRowList = new ArrayList();
+ maskAtRowList.add(new BitSet(tSpanSize));
+
+ List annotsAtRowList = new ArrayList();
+
+ // divide parse annotations into rows
+ while (nestedAnnList.size() != 0)
+ {
+ // pop annotation off
+ Annotation ann = (Annotation) nestedAnnList.remove(0);
+
+ BitSet annBitSet = new BitSet(tSpanSize);
+ annBitSet.set(ann.getBegin() - tSpanAnn.getBegin(), ann.getEnd()
+ - tSpanAnn.getBegin());
+
+ // figure out which TR to place it in
+ int idx = 0;
+ boolean rowFound = false;
+ while (!rowFound)
+ {
+ BitSet trBitSet = (BitSet) maskAtRowList.get(idx);
+
+ // interset BitSets to determine if annotation will fit
+ // in this row
+ while (trBitSet.intersects(annBitSet))
+ {
+ idx++;
+ if ((idx + 1) > maskAtRowList.size())
+ {
+ trBitSet = new BitSet(tSpanSize);
+ maskAtRowList.add(trBitSet);
+ } else
+ {
+ trBitSet = (BitSet) maskAtRowList.get(idx);
+ }
+ }
+ trBitSet.or(annBitSet);
+ rowFound = true;
+ }
+
+ List annList = null;
+ if ((idx + 1) > annotsAtRowList.size())
+ {
+ annList = new ArrayList();
+ annList.add(ann);
+ annotsAtRowList.add(annList);
+ } else
+ {
+ annList = (List) annotsAtRowList.get(idx);
+ annList.add(ann);
+ }
+ }
+ return annotsAtRowList;
+ }
+
+ /**
+ * Comparator for comparing two Annotation objects based on span length.
+ *
+ * @author Mayo Clinic
+ *
+ */
+ class AnnotationLengthComparator implements Comparator
+ {
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
+ */
+ public int compare(Object o1, Object o2)
+ {
+ Annotation a1 = (Annotation) o1;
+ Annotation a2 = (Annotation) o2;
+
+ Integer len1 = new Integer(a1.getCoveredText().length());
+ Integer len2 = new Integer(a2.getCoveredText().length());
+
+ if (len1.equals(len2))
+ {
+ if (a1.getBegin() < a2.getBegin())
+ return -1;
+ else if (a1.getBegin() > a2.getBegin())
+ return 1;
+ else
+ {
+ if (a1.getEnd() < a2.getEnd())
+ return 1;
+ else if (a1.getEnd() > a2.getEnd())
+ return -1;
+ else
+ return 0;
+ }
+ } else
+ {
+ return len1.compareTo(len2);
+ }
+ }
+ }
+
+ /**
+ * Comparator for comparing two Annotation objects based on offset position.
+ *
+ * @author Mayo Clinic
+ *
+ */
+ class AnnotationPositionComparator implements Comparator
+ {
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
+ */
+ public int compare(Object o1, Object o2)
+ {
+ Annotation a1 = (Annotation) o1;
+ Annotation a2 = (Annotation) o2;
+
+ if (a1.getBegin() < a2.getBegin())
+ return -1;
+ else if (a1.getBegin() > a2.getBegin())
+ return 1;
+ else
+ {
+ if (a1.getEnd() < a2.getEnd())
+ return 1;
+ else if (a1.getEnd() > a2.getEnd())
+ return -1;
+ else
+ return 0;
+ }
+ }
+ }
+
+ private List getAnnotations(JCas jcas, int annType, int begin, int end)
+ {
+ List list = new ArrayList();
+ Iterator itr = jcas.getJFSIndexRepository().getAnnotationIndex(annType)
+ .iterator();
+ while (itr.hasNext())
+ {
+ Annotation ann = (Annotation) itr.next();
+ if ((ann.getBegin() >= begin) && (ann.getEnd() <= end))
+ {
+ list.add(ann);
+ }
+ }
+ return list;
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NonTerminalConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NonTerminalConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NonTerminalConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NonTerminalConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,24 +14,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.uima.core.cc;
-
-import org.apache.uima.collection.CasConsumer;
-
-/**
- * Extends the CasConsumer interface to provide a method for getting
- * output in XML form. A regular CasConsumer is "terminal" in the sense
- * that its output does not get returned to object that initiated the
- * CasConsumer. This interface allows for "non-terminal" behavior so that
- * the output can be returned.
- *
- * @author Mayo Clinic
- */
-public interface NonTerminalConsumer extends CasConsumer
-{
- /**
- * Gets the generated output from a CasConsumer in XML form.
- * @return Output xml in String form.
- */
- public String getOutputXml();
-}
\ No newline at end of file
+package edu.mayo.bmi.uima.core.cc;
+
+import org.apache.uima.collection.CasConsumer;
+
+/**
+ * Extends the CasConsumer interface to provide a method for getting
+ * output in XML form. A regular CasConsumer is "terminal" in the sense
+ * that its output does not get returned to object that initiated the
+ * CasConsumer. This interface allows for "non-terminal" behavior so that
+ * the output can be returned.
+ *
+ * @author Mayo Clinic
+ */
+public interface NonTerminalConsumer extends CasConsumer
+{
+ /**
+ * Gets the generated output from a CasConsumer in XML form.
+ * @return Output xml in String form.
+ */
+ public String getOutputXml();
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NormalizedFilesInDirectoryCasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NormalizedFilesInDirectoryCasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NormalizedFilesInDirectoryCasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NormalizedFilesInDirectoryCasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,85 +14,85 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Iterator;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-
-import edu.mayo.bmi.uima.core.type.syntax.WordToken;
-import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
-
-/**
- * For each CAS a local file with the document text is written to a directory specifed by a parameter.
- * This CAS consumer does not make use of any annotation information in the cas except for the document
- * id specified the CommonTypeSystem.xml descriptor. The document id will be the name of the file written
- * for each CAS.
- *
- * This CAS consumer may be useful if you want to write the results of a collection reader and/or CAS
- * initializer to the local file system. For example, a JDBC Collection Reader may read XML documents
- * from a database and a specialized cas initializer may convert the XML to plain text. The
- * FilesInDirectoryCasConsumer can now be used to write the plain text to local plain text files.
- */
-
-public class NormalizedFilesInDirectoryCasConsumer extends CasConsumer_ImplBase {
-
- public static final String PARAM_OUTPUTDIR = "OutputDirectory";
-
- File iv_outputDirectory;
-
- public void initialize() throws ResourceInitializationException
- {
- String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
- iv_outputDirectory = new File(outputDirectoryName);
- if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
- throw new ResourceInitializationException(
- new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
- }
-
- public void processCas(CAS cas) throws ResourceProcessException
- {
- try
- {
- JCas jcas;
- jcas = cas.getJCas();
-
- StringBuffer normalizedText = new StringBuffer();
-
- JFSIndexRepository indexes = jcas.getJFSIndexRepository();
- Iterator tokenItr = indexes.getAnnotationIndex(WordToken.type).iterator();
- while (tokenItr.hasNext())
- {
- WordToken token = (WordToken) tokenItr.next();
- String tokenNormText = token.getCanonicalForm();
- normalizedText.append(tokenNormText+" ");
- }
- String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
- writeToFile(documentID, normalizedText.toString());
- }
- catch(Exception e)
- {
- throw new ResourceProcessException(e);
- }
- }
-
- private void writeToFile(String documentID, String documentText) throws IOException
- {
- File outputFile = new File(iv_outputDirectory, documentID);
- outputFile.createNewFile();
- OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
- out.write(documentText.getBytes());
- out.flush();
- out.close();
- }
-}
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Iterator;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JFSIndexRepository;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+
+import edu.mayo.bmi.uima.core.type.syntax.WordToken;
+import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
+
+/**
+ * For each CAS a local file with the document text is written to a directory specifed by a parameter.
+ * This CAS consumer does not make use of any annotation information in the cas except for the document
+ * id specified the CommonTypeSystem.xml descriptor. The document id will be the name of the file written
+ * for each CAS.
+ *
+ * This CAS consumer may be useful if you want to write the results of a collection reader and/or CAS
+ * initializer to the local file system. For example, a JDBC Collection Reader may read XML documents
+ * from a database and a specialized cas initializer may convert the XML to plain text. The
+ * FilesInDirectoryCasConsumer can now be used to write the plain text to local plain text files.
+ */
+
+public class NormalizedFilesInDirectoryCasConsumer extends CasConsumer_ImplBase {
+
+ public static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+ File iv_outputDirectory;
+
+ public void initialize() throws ResourceInitializationException
+ {
+ String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
+ iv_outputDirectory = new File(outputDirectoryName);
+ if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
+ throw new ResourceInitializationException(
+ new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
+ }
+
+ public void processCas(CAS cas) throws ResourceProcessException
+ {
+ try
+ {
+ JCas jcas;
+ jcas = cas.getJCas();
+
+ StringBuffer normalizedText = new StringBuffer();
+
+ JFSIndexRepository indexes = jcas.getJFSIndexRepository();
+ Iterator tokenItr = indexes.getAnnotationIndex(WordToken.type).iterator();
+ while (tokenItr.hasNext())
+ {
+ WordToken token = (WordToken) tokenItr.next();
+ String tokenNormText = token.getCanonicalForm();
+ normalizedText.append(tokenNormText+" ");
+ }
+ String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+ writeToFile(documentID, normalizedText.toString());
+ }
+ catch(Exception e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+
+ private void writeToFile(String documentID, String documentText) throws IOException
+ {
+ File outputFile = new File(iv_outputDirectory, documentID);
+ outputFile.createNewFile();
+ OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
+ out.write(documentText.getBytes());
+ out.flush();
+ out.close();
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenFreqCasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenFreqCasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenFreqCasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenFreqCasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,146 +14,146 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.TreeSet;
-
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.ProcessTrace;
-
-import edu.mayo.bmi.uima.core.type.syntax.WordToken;
-
-/**
- * This class creates a file that contains the frequencies of the word tokens found in a set
- * in a text collection. This cas consumer could potentially be used to create a frequency
- * file for any kind of annotation but only counts Token annotations at the moment.
- *
- * @see edu.mayo.bmi.uima.core.type.syntax.WordToken.java
- */
-
-public class TokenFreqCasConsumer extends CasConsumer_ImplBase
-{
- /**
- * The name of the parameter that is specifies the path of the output file in the
- * descriptor file. The value is "TokenFreqFile" and should be set in the descriptor
- * file.
- */
-
- public static final String PARAM_WORD_FREQ_FILE = "TokenFreqFile";
- File wordFreqFile;
- Map wordFreqs;
-
-/**
- * This method opens/creates the file specified by "TokenFreqFile" and initializes the
- * data structure that will keep track of frequency counts.
- * @see org.apache.uima.collection.CasConsumer_ImplBase#initialize()
- */
- public void initialize() throws ResourceInitializationException
- {
- try
- {
- String wordFreqFileName = (String) getConfigParameterValue(PARAM_WORD_FREQ_FILE);
- wordFreqFile = new File(wordFreqFileName);
- if(!wordFreqFile.exists())
- {
- wordFreqFile.createNewFile();
- }
- }
- catch(Exception ioe)
- {
- throw new ResourceInitializationException(ioe);
- }
- wordFreqs = new HashMap();
- }
-
- /**
- * Iterates through all of the WordTokenAnnotation's, gets the covered text for each annotation
- * and increments the frequency count for that text.
- *
- * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
- */
- public void processCas(CAS cas) throws ResourceProcessException
- {
- try
- {
- JCas jcas;
- jcas = cas.getJCas();
- JFSIndexRepository indexes = jcas.getJFSIndexRepository();
- Iterator tokenItr = indexes.getAnnotationIndex(WordToken.type).iterator();
- while (tokenItr.hasNext())
- {
- WordToken token = (WordToken) tokenItr.next();
- String text = token.getCoveredText();
- if(!wordFreqs.containsKey(text))
- {
- wordFreqs.put(text, new int[1]);
- }
- ((int[])wordFreqs.get(text))[0]++;
- }
- }
- catch(Exception exception)
- {
- throw new ResourceProcessException(exception);
- }
- }
-
- /**
- * This method sorts the frequency counts and prints out the resulting frequencies in descending
- * order to the frequency file in 'word|count' format.
- */
- public void collectionProcessComplete(ProcessTrace arg0) throws ResourceProcessException, IOException
- {
- //sortedFreqs will contain objects of type Object[] of length 2. The first object in the array
- //will hold the token and the second the frequency. We want to sort on the frequency first in
- //descending order and token in ascending order for those tokens with the same frequency.
- TreeSet sortedFreqs = new TreeSet(
- new Comparator() {
- public int compare(Object obj1, Object obj2)
- {
- Object[] tokenFreq1 = (Object[]) obj1;
- Object[] tokenFreq2 = (Object[]) obj2;
- Integer freq1 = (Integer)tokenFreq1[1];
- Integer freq2 = (Integer)tokenFreq2[1];
- if(!freq2.equals(freq1))
- return freq2.compareTo(freq1);
- String token1 = (String)tokenFreq1[0];
- String token2 = (String)tokenFreq2[0];
- return token1.compareTo(token2);
- }
- });
-
- Iterator words = wordFreqs.keySet().iterator();
- while(words.hasNext())
- {
- String word = (String) words.next();
- int freq = ((int[])(wordFreqs.get(word)))[0];
- sortedFreqs.add(new Object[] {word,new Integer(freq)});
- }
-
- PrintStream out = new PrintStream(new FileOutputStream(wordFreqFile));
- Iterator freqs = sortedFreqs.iterator();
- while(freqs.hasNext())
- {
- Object[] tokenFreq = (Object[]) freqs.next();
- String word = (String) tokenFreq[0];
- int freq = ((Integer)tokenFreq[1]).intValue();
- out.println(word+"|"+freq);
- }
- out.flush();
- out.close();
- }
-}
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeSet;
+
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JFSIndexRepository;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.ProcessTrace;
+
+import edu.mayo.bmi.uima.core.type.syntax.WordToken;
+
+/**
+ * This class creates a file that contains the frequencies of the word tokens found in a set
+ * in a text collection. This cas consumer could potentially be used to create a frequency
+ * file for any kind of annotation but only counts Token annotations at the moment.
+ *
+ * @see edu.mayo.bmi.uima.core.type.syntax.WordToken.java
+ */
+
+public class TokenFreqCasConsumer extends CasConsumer_ImplBase
+{
+ /**
+ * The name of the parameter that is specifies the path of the output file in the
+ * descriptor file. The value is "TokenFreqFile" and should be set in the descriptor
+ * file.
+ */
+
+ public static final String PARAM_WORD_FREQ_FILE = "TokenFreqFile";
+ File wordFreqFile;
+ Map wordFreqs;
+
+/**
+ * This method opens/creates the file specified by "TokenFreqFile" and initializes the
+ * data structure that will keep track of frequency counts.
+ * @see org.apache.uima.collection.CasConsumer_ImplBase#initialize()
+ */
+ public void initialize() throws ResourceInitializationException
+ {
+ try
+ {
+ String wordFreqFileName = (String) getConfigParameterValue(PARAM_WORD_FREQ_FILE);
+ wordFreqFile = new File(wordFreqFileName);
+ if(!wordFreqFile.exists())
+ {
+ wordFreqFile.createNewFile();
+ }
+ }
+ catch(Exception ioe)
+ {
+ throw new ResourceInitializationException(ioe);
+ }
+ wordFreqs = new HashMap();
+ }
+
+ /**
+ * Iterates through all of the WordTokenAnnotation's, gets the covered text for each annotation
+ * and increments the frequency count for that text.
+ *
+ * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
+ */
+ public void processCas(CAS cas) throws ResourceProcessException
+ {
+ try
+ {
+ JCas jcas;
+ jcas = cas.getJCas();
+ JFSIndexRepository indexes = jcas.getJFSIndexRepository();
+ Iterator tokenItr = indexes.getAnnotationIndex(WordToken.type).iterator();
+ while (tokenItr.hasNext())
+ {
+ WordToken token = (WordToken) tokenItr.next();
+ String text = token.getCoveredText();
+ if(!wordFreqs.containsKey(text))
+ {
+ wordFreqs.put(text, new int[1]);
+ }
+ ((int[])wordFreqs.get(text))[0]++;
+ }
+ }
+ catch(Exception exception)
+ {
+ throw new ResourceProcessException(exception);
+ }
+ }
+
+ /**
+ * This method sorts the frequency counts and prints out the resulting frequencies in descending
+ * order to the frequency file in 'word|count' format.
+ */
+ public void collectionProcessComplete(ProcessTrace arg0) throws ResourceProcessException, IOException
+ {
+ //sortedFreqs will contain objects of type Object[] of length 2. The first object in the array
+ //will hold the token and the second the frequency. We want to sort on the frequency first in
+ //descending order and token in ascending order for those tokens with the same frequency.
+ TreeSet sortedFreqs = new TreeSet(
+ new Comparator() {
+ public int compare(Object obj1, Object obj2)
+ {
+ Object[] tokenFreq1 = (Object[]) obj1;
+ Object[] tokenFreq2 = (Object[]) obj2;
+ Integer freq1 = (Integer)tokenFreq1[1];
+ Integer freq2 = (Integer)tokenFreq2[1];
+ if(!freq2.equals(freq1))
+ return freq2.compareTo(freq1);
+ String token1 = (String)tokenFreq1[0];
+ String token2 = (String)tokenFreq2[0];
+ return token1.compareTo(token2);
+ }
+ });
+
+ Iterator words = wordFreqs.keySet().iterator();
+ while(words.hasNext())
+ {
+ String word = (String) words.next();
+ int freq = ((int[])(wordFreqs.get(word)))[0];
+ sortedFreqs.add(new Object[] {word,new Integer(freq)});
+ }
+
+ PrintStream out = new PrintStream(new FileOutputStream(wordFreqFile));
+ Iterator freqs = sortedFreqs.iterator();
+ while(freqs.hasNext())
+ {
+ Object[] tokenFreq = (Object[]) freqs.next();
+ String word = (String) tokenFreq[0];
+ int freq = ((Integer)tokenFreq[1]).intValue();
+ out.println(word+"|"+freq);
+ }
+ out.flush();
+ out.close();
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenOffsetsCasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenOffsetsCasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenOffsetsCasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenOffsetsCasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,98 +14,98 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-
-import edu.mayo.bmi.uima.core.type.syntax.BaseToken;
-import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
-
-/**
- * For each CAS a local file with the offsets of the BaseToken annotations is written to a directory specifed by a parameter.
- * The format of the output files is
- * 0|13
- * 17|19
- * 19|20
- * ...
- *
- * This CAS consumer does not make use of any annotation information in the
- * cas except for the document id specified the CommonTypeSystem.xml
- * descriptor and the BaseToken annotations. The document id will be the
- * name of the file written for each CAS.
- *
- * This CAS consumer was written so that token offsets could be written to
- * a file. The offsets were compared to similarly generated annotation offsets
- * from Knowtator annotations.
- */
-
-public class TokenOffsetsCasConsumer extends CasConsumer_ImplBase {
-
- public static final String PARAM_OUTPUTDIR = "OutputDirectory";
-
- File iv_outputDirectory;
-
- public void initialize() throws ResourceInitializationException
- {
- String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
- iv_outputDirectory = new File(outputDirectoryName);
- if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
- throw new ResourceInitializationException(
- new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
- }
-
- public void processCas(CAS cas) throws ResourceProcessException
- {
- try
- {
- JCas jcas;
- jcas = cas.getJCas();
-
- List offsets = new ArrayList();
- JFSIndexRepository indexes = jcas.getJFSIndexRepository();
- Iterator tokenItr = indexes.getAnnotationIndex(BaseToken.type).iterator();
- while (tokenItr.hasNext())
- {
- BaseToken token = (BaseToken) tokenItr.next();
- String offset = ""+token.getBegin()+"|"+token.getEnd();
- offsets.add(offset);
- }
-
- String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
- writeToFile(documentID, offsets);
-
- }
- catch(Exception e)
- {
- throw new ResourceProcessException(e);
- }
- }
-
- private void writeToFile(String documentID, List offsets) throws IOException
- {
- File outputFile = new File(iv_outputDirectory, documentID);
- outputFile.createNewFile();
- OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
- for(int i=0; i<offsets.size(); i++)
- {
- String offset = (String) offsets.get(i)+"\n";
- out.write(offset.getBytes());
- }
- out.flush();
- out.close();
- }
-}
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JFSIndexRepository;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+
+import edu.mayo.bmi.uima.core.type.syntax.BaseToken;
+import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
+
+/**
+ * For each CAS a local file with the offsets of the BaseToken annotations is written to a directory specifed by a parameter.
+ * The format of the output files is
+ * 0|13
+ * 17|19
+ * 19|20
+ * ...
+ *
+ * This CAS consumer does not make use of any annotation information in the
+ * cas except for the document id specified the CommonTypeSystem.xml
+ * descriptor and the BaseToken annotations. The document id will be the
+ * name of the file written for each CAS.
+ *
+ * This CAS consumer was written so that token offsets could be written to
+ * a file. The offsets were compared to similarly generated annotation offsets
+ * from Knowtator annotations.
+ */
+
+public class TokenOffsetsCasConsumer extends CasConsumer_ImplBase {
+
+ public static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+ File iv_outputDirectory;
+
+ public void initialize() throws ResourceInitializationException
+ {
+ String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
+ iv_outputDirectory = new File(outputDirectoryName);
+ if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
+ throw new ResourceInitializationException(
+ new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
+ }
+
+ public void processCas(CAS cas) throws ResourceProcessException
+ {
+ try
+ {
+ JCas jcas;
+ jcas = cas.getJCas();
+
+ List offsets = new ArrayList();
+ JFSIndexRepository indexes = jcas.getJFSIndexRepository();
+ Iterator tokenItr = indexes.getAnnotationIndex(BaseToken.type).iterator();
+ while (tokenItr.hasNext())
+ {
+ BaseToken token = (BaseToken) tokenItr.next();
+ String offset = ""+token.getBegin()+"|"+token.getEnd();
+ offsets.add(offset);
+ }
+
+ String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+ writeToFile(documentID, offsets);
+
+ }
+ catch(Exception e)
+ {
+ throw new ResourceProcessException(e);
+ }
+ }
+
+ private void writeToFile(String documentID, List offsets) throws IOException
+ {
+ File outputFile = new File(iv_outputDirectory, documentID);
+ outputFile.createNewFile();
+ OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
+ for(int i=0; i<offsets.size(); i++)
+ {
+ String offset = (String) offsets.get(i)+"\n";
+ out.write(offset.getBytes());
+ }
+ out.flush();
+ out.close();
+ }
+}