You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/05/27 03:10:03 UTC
svn commit: r1128130 - in /incubator/opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/formats/ main/java/opennlp/tools/formats/ad/
test/java/opennlp/tools/formats/ test/java/opennlp/tools/formats/ad/
Author: colen
Date: Fri May 27 01:10:03 2011
New Revision: 1128130
URL: http://svn.apache.org/viewvc?rev=1128130&view=rev
Log:
OPENNLP-186 Small refactoring of Arvores Deitadas Format classes
Added:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java
- copied, changed from r1099560, incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADParagraphStream.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/PortugueseContractionUtility.java
- copied, changed from r1099267, incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ContractionUtility.java
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java (contents, props changed)
- copied, changed from r1128121, incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADChunkSampleStreamTest.java
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java
- copied, changed from r1128121, incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADNameSampleStreamTest.java
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java
- copied, changed from r1128121, incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADParagraphStreamTest.java
Removed:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ContractionUtility.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADParagraphStream.java
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADChunkSampleStreamTest.java
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADNameSampleStreamTest.java
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADParagraphStreamTest.java
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java?rev=1128130&r1=1128129&r2=1128130&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java Fri May 27 01:10:03 2011
@@ -24,10 +24,10 @@ import java.util.ArrayList;
import java.util.List;
import opennlp.tools.chunker.ChunkSample;
-import opennlp.tools.formats.ad.ADParagraphStream.Paragraph;
-import opennlp.tools.formats.ad.ADParagraphStream.ParagraphParser.Leaf;
-import opennlp.tools.formats.ad.ADParagraphStream.ParagraphParser.Node;
-import opennlp.tools.formats.ad.ADParagraphStream.ParagraphParser.TreeElement;
+import opennlp.tools.formats.ad.ADSentenceStream.Sentence;
+import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Leaf;
+import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node;
+import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.TreeElement;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -57,7 +57,7 @@ import opennlp.tools.util.PlainTextByLin
*/
public class ADChunkSampleStream implements ObjectStream<ChunkSample> {
- private final ObjectStream<ADParagraphStream.Paragraph> adSentenceStream;
+ private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
private int start = -1;
private int end = -1;
@@ -73,7 +73,7 @@ public class ADChunkSampleStream impleme
* a stream of lines as {@link String}
*/
public ADChunkSampleStream(ObjectStream<String> lineStream) {
- this.adSentenceStream = new ADParagraphStream(lineStream);
+ this.adSentenceStream = new ADSentenceStream(lineStream);
}
/**
@@ -87,7 +87,7 @@ public class ADChunkSampleStream impleme
public ADChunkSampleStream(InputStream in, String charsetName) {
try {
- this.adSentenceStream = new ADParagraphStream(new PlainTextByLineStream(
+ this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
in, charsetName));
} catch (UnsupportedEncodingException e) {
// UTF-8 is available on all JVMs, will never happen
@@ -97,7 +97,7 @@ public class ADChunkSampleStream impleme
public ChunkSample read() throws IOException {
- Paragraph paragraph;
+ Sentence paragraph;
while ((paragraph = this.adSentenceStream.read()) != null) {
if (end > -1 && index >= end) {
@@ -201,14 +201,15 @@ public class ADChunkSampleStream impleme
private String getChunkTag(String tag) {
String phraseTag = tag.substring(tag.lastIndexOf(":") + 1);
-
- if (phraseTag.equals("np") || phraseTag.equals("ap")
- || phraseTag.equals("advp") || phraseTag.equals("vp")
- || phraseTag.equals("pp")) {
- phraseTag = phraseTag.toUpperCase();
- } else {
- phraseTag = "O";
- }
+
+ // maybe we should use only np, vp and pp, but will keep ap and advp.
+ if (phraseTag.equals("np") || phraseTag.equals("vp")
+ || phraseTag.equals("pp") || phraseTag.equals("ap")
+ || phraseTag.equals("advp")) {
+ phraseTag = phraseTag.toUpperCase();
+ } else {
+ phraseTag = "O";
+ }
return phraseTag;
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java?rev=1128130&r1=1128129&r2=1128130&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java Fri May 27 01:10:03 2011
@@ -29,11 +29,10 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import opennlp.tools.formats.ContractionUtility;
-import opennlp.tools.formats.ad.ADParagraphStream.Paragraph;
-import opennlp.tools.formats.ad.ADParagraphStream.ParagraphParser.Leaf;
-import opennlp.tools.formats.ad.ADParagraphStream.ParagraphParser.Node;
-import opennlp.tools.formats.ad.ADParagraphStream.ParagraphParser.TreeElement;
+import opennlp.tools.formats.ad.ADSentenceStream.Sentence;
+import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Leaf;
+import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node;
+import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.TreeElement;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -145,7 +144,7 @@ public class ADNameSampleStream implemen
HAREM = Collections.unmodifiableMap(harem);
}
- private final ObjectStream<ADParagraphStream.Paragraph> adSentenceStream;
+ private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
/**
* To keep the last left contraction part
@@ -161,7 +160,7 @@ public class ADNameSampleStream implemen
* a stream of lines as {@link String}
*/
public ADNameSampleStream(ObjectStream<String> lineStream) {
- this.adSentenceStream = new ADParagraphStream(lineStream);
+ this.adSentenceStream = new ADSentenceStream(lineStream);
}
/**
@@ -175,7 +174,7 @@ public class ADNameSampleStream implemen
public ADNameSampleStream(InputStream in, String charsetName) {
try {
- this.adSentenceStream = new ADParagraphStream(new PlainTextByLineStream(
+ this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
in, charsetName));
} catch (UnsupportedEncodingException e) {
// UTF-8 is available on all JVMs, will never happen
@@ -185,7 +184,7 @@ public class ADNameSampleStream implemen
public NameSample read() throws IOException {
- Paragraph paragraph;
+ Sentence paragraph;
while ((paragraph = this.adSentenceStream.read()) != null) {
Node root = paragraph.getRoot();
List<String> sentence = new ArrayList<String>();
@@ -301,7 +300,7 @@ public class ADNameSampleStream implemen
String right = leaf.getLexeme();
if (tag != null && tag.contains("<-sam>")) {
right = leaf.getLexeme();
- String c = ContractionUtility.toContraction(leftContractionPart, right);
+ String c = PortugueseContractionUtility.toContraction(leftContractionPart, right);
if (c != null) {
sentence.add(c);
Copied: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java (from r1099560, incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADParagraphStream.java)
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java?p2=incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java&p1=incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADParagraphStream.java&r1=1099560&r2=1128130&rev=1128130&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADParagraphStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java Fri May 27 01:10:03 2011
@@ -26,12 +26,12 @@ import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import opennlp.tools.formats.ad.ADParagraphStream.ParagraphParser.Node;
+import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.ObjectStream;
/**
- * Stream filter which merges text lines into paragraphs, following the Arvores
+ * Stream filter which merges text lines into sentences, following the Arvores
* Deitadas syntax.
* <p>
* Information about the format:<br>
@@ -43,13 +43,14 @@ import opennlp.tools.util.ObjectStream;
* <p>
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class ADParagraphStream extends
- FilterObjectStream<String, ADParagraphStream.Paragraph> {
+public class ADSentenceStream extends
+ FilterObjectStream<String, ADSentenceStream.Sentence> {
- public static class Paragraph {
+ public static class Sentence {
private String text;
private Node root;
+ private String metadata;
public String getText() {
return text;
@@ -67,15 +68,25 @@ public class ADParagraphStream extends
this.root = root;
}
+ public void setMetadata(String metadata) {
+ this.metadata = metadata;
+ }
+
+ public String getMetadata() {
+ return metadata;
+ }
+
}
/**
* Parses a sample of AD corpus. A sentence in AD corpus is represented by a
- * Tree. In this class we declare some types to represent that tree.
+ * Tree. In this class we declare some types to represent that tree. Today we get only
+ * the first alternative (A1).
*/
- public static class ParagraphParser {
+ public static class SentenceParser {
- private Pattern rootPattern = Pattern.compile("^[^:=]+:[^(\\s]+$");
+ //private Pattern rootPattern = Pattern.compile("^[^:=]+:[^(\\s]+(\\(.*?\\))?$");
+ private Pattern rootPattern = Pattern.compile("^A\\d+$");
private Pattern nodePattern = Pattern
.compile("^([=-]*)([^:=]+:[^\\(\\s]+)(\\(([^\\)]+)\\))?\\s*$");
private Pattern leafPattern = Pattern
@@ -83,53 +94,72 @@ public class ADParagraphStream extends
private Pattern bizarreLeafPattern = Pattern
.compile("^([=-]*)([^:=]+=[^\\(\\s]+)\\(([\"'].+[\"'])?\\s*([^\\)]+)?\\)\\s+(.+)");
private Pattern punctuationPattern = Pattern.compile("^(=*)(\\W+)$");
+
+ private String text,meta;
/**
- * Parse the paragraph
+ * Parse the sentence
*/
- public Paragraph parse(String paragraphString) {
+ public Sentence parse(String sentenceString, int para, boolean isTitle, boolean isBox) {
BufferedReader reader = new BufferedReader(new StringReader(
- paragraphString));
- Paragraph sentence = new Paragraph();
+ sentenceString));
+ Sentence sentence = new Sentence();
Node root = new Node();
try {
// first line is <s ...>
String line = reader.readLine();
- if (line.startsWith("<s")) {
- // should finde the source source
+
+ boolean useSameTextAndMeta = false; // to handle cases where there are diff sug of parse (&&)
+
+ // should find the source source
while (!line.startsWith("SOURCE")) {
+ if(line.equals("&&")) {
+ // same sentence again!
+ useSameTextAndMeta = true;
+ break;
+ }
line = reader.readLine();
if (line == null) {
- return new Paragraph();
+ return null;
}
}
+ if(!useSameTextAndMeta) {
+ // got source, get the metadata
+ String metaFromSource = line.substring(7);
+ line = reader.readLine();
+ // we should have the plain sentence
+ // we remove the first token
+ int start = line.indexOf(" ");
+ text = line.substring(start + 1);
+ String titleTag = "";
+ if(isTitle) titleTag = " title";
+ String boxTag = "";
+ if(isBox) boxTag = " box";
+ meta = line.substring(0, start) + " p=" + para + titleTag + boxTag + metaFromSource;
}
- line = reader.readLine();
- // we should have the plain sentence
- // we remove the first token
- int start = line.indexOf(" ");
- sentence.setText(line.substring(start + 1));
+ sentence.setText(text);
+ sentence.setMetadata(meta);
// now we look for the root node
line = reader.readLine();
while (!rootPattern.matcher(line).matches()) {
line = reader.readLine();
if (line == null) {
- return sentence;
+ return null;
}
}
// got the root. Add it to the stack
Stack<Node> nodeStack = new Stack<Node>();
// we get the complete line
- root.setSyntacticTag("ROOT");
+ root.setSyntacticTag(line);
root.setLevel(0);
nodeStack.add(root);
// now we have to take care of the lastLevel. Every time it raises, we
// will add the
// leaf to the node at the top. If it decreases, we remove the top.
- //line = reader.readLine();
- while (line.length() != 0 && line.startsWith("</s>") == false) {
+ line = reader.readLine();
+ while (line != null && line.length() != 0 && line.startsWith("</s>") == false && !line.equals("&&")) {
TreeElement element = this.getElement(line);
if(element != null) {
@@ -175,7 +205,7 @@ public class ADParagraphStream extends
}
} catch (Exception e) {
- System.err.println(paragraphString);
+ System.err.println(sentenceString);
e.printStackTrace();
return sentence;
}
@@ -395,53 +425,102 @@ public class ADParagraphStream extends
}
/**
- * The start paragraph pattern
+ * The start sentence pattern
*/
- private static final Pattern start = Pattern.compile("<s[^>]*>");
+ private static final Pattern sentStart = Pattern.compile("<s[^>]*>");
/**
- * The end paragraph pattern
+ * The end sentence pattern
+ */
+ private static final Pattern sentEnd = Pattern.compile("</s>");
+
+ /**
+ * The start sentence pattern
*/
- private static final Pattern end = Pattern.compile("</s>");
+ private static final Pattern titleStart = Pattern.compile("<t[^>]*>");
- private ParagraphParser parser;
+ /**
+ * The end sentence pattern
+ */
+ private static final Pattern titleEnd = Pattern.compile("</t>");
+
+ /**
+ * The start sentence pattern
+ */
+ private static final Pattern boxStart = Pattern.compile("<caixa[^>]*>");
- public ADParagraphStream(ObjectStream<String> lineStream) {
+ /**
+ * The end sentence pattern
+ */
+ private static final Pattern boxEnd = Pattern.compile("</caixa>");
+
+
+ /**
+ * The start sentence pattern
+ */
+ private static final Pattern paraStart = Pattern.compile("<p[^>]*>");
+
+ /**
+ * The start sentence pattern
+ */
+ private static final Pattern textStart = Pattern.compile("<ext[^>]*>");
+
+ private SentenceParser parser;
+
+ private int paraID = 0;
+ private boolean isTitle = false;
+ private boolean isBox = false;
+
+ public ADSentenceStream(ObjectStream<String> lineStream) {
super(lineStream);
- parser = new ParagraphParser();
+ parser = new SentenceParser();
}
+
- public Paragraph read() throws IOException {
+ public Sentence read() throws IOException {
- StringBuilder paragraph = new StringBuilder();
- boolean paragraphStarted = false;
+ StringBuilder sentence = new StringBuilder();
+ boolean sentenceStarted = false;
while (true) {
String line = samples.read();
if (line != null) {
+
+ if(sentenceStarted) {
+ if (sentEnd.matcher(line).matches()) {
+ sentenceStarted = false;
+ } else {
+ sentence.append(line).append('\n');
+ }
+ } else {
+ if (sentStart.matcher(line).matches()) {
+ sentenceStarted = true;
+ } else if(paraStart.matcher(line).matches()) {
+ paraID++;
+ } else if(titleStart.matcher(line).matches()) {
+ isTitle = true;
+ } else if(titleEnd.matcher(line).matches()) {
+ isTitle = false;
+ } else if(textStart.matcher(line).matches()) {
+ paraID = 0;
+ } else if(boxStart.matcher(line).matches()) {
+ isBox = true;
+ } else if(boxEnd.matcher(line).matches()) {
+ isBox = false;
+ }
+ }
- if (start.matcher(line).matches()) {
- paragraphStarted = true;
- }
-
- if (paragraphStarted) {
- paragraph.append(line).append('\n');
- }
-
- if (end.matcher(line).matches()) {
- paragraphStarted = false;
- }
- if (!paragraphStarted && paragraph.length() > 0) {
- return parser.parse(paragraph.toString());
+ if (!sentenceStarted && sentence.length() > 0) {
+ return parser.parse(sentence.toString(), paraID, isTitle, isBox);
}
} else {
// handle end of file
- if (paragraphStarted) {
- if (paragraph.length() > 0) {
- return parser.parse(paragraph.toString());
+ if (sentenceStarted) {
+ if (sentence.length() > 0) {
+ return parser.parse(sentence.toString(), paraID, isTitle, isBox);
}
} else {
return null;
Copied: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/PortugueseContractionUtility.java (from r1099267, incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ContractionUtility.java)
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/PortugueseContractionUtility.java?p2=incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/PortugueseContractionUtility.java&p1=incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ContractionUtility.java&r1=1099267&r2=1128130&rev=1128130&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ContractionUtility.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/PortugueseContractionUtility.java Fri May 27 01:10:03 2011
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package opennlp.tools.formats;
+package opennlp.tools.formats.ad;
import java.util.Collections;
import java.util.HashMap;
@@ -32,7 +32,7 @@ import java.util.Map;
* <p>
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class ContractionUtility {
+public class PortugueseContractionUtility {
private static final Map<String, String> CONTRACTIONS;
static {
Copied: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java (from r1128121, incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADChunkSampleStreamTest.java)
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java?p2=incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java&p1=incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADChunkSampleStreamTest.java&r1=1128121&r2=1128130&rev=1128130&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADChunkSampleStreamTest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java Fri May 27 01:10:03 2011
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package opennlp.tools.formats;
+package opennlp.tools.formats.ad;
import static org.junit.Assert.assertEquals;
Propchange: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Copied: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java (from r1128121, incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADNameSampleStreamTest.java)
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java?p2=incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java&p1=incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADNameSampleStreamTest.java&r1=1128121&r2=1128130&rev=1128130&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADNameSampleStreamTest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java Fri May 27 01:10:03 2011
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package opennlp.tools.formats;
+package opennlp.tools.formats.ad;
import static org.junit.Assert.assertEquals;
Copied: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java (from r1128121, incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADParagraphStreamTest.java)
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java?p2=incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java&p1=incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADParagraphStreamTest.java&r1=1128121&r2=1128130&rev=1128130&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ADParagraphStreamTest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java Fri May 27 01:10:03 2011
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package opennlp.tools.formats;
+package opennlp.tools.formats.ad;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.io.InputStream;
-import opennlp.tools.formats.ad.ADParagraphStream;
+import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.util.PlainTextByLineStream;
import org.junit.Test;
@@ -33,9 +33,9 @@ public class ADParagraphStreamTest {
public void testSimpleReading() throws IOException {
int count = 0;
- ADParagraphStream stream = openData();
+ ADSentenceStream stream = openData();
- ADParagraphStream.Paragraph paragraph = stream.read();
+ ADSentenceStream.Sentence paragraph = stream.read();
while(paragraph != null) {
count++;
paragraph = stream.read();
@@ -48,9 +48,9 @@ public class ADParagraphStreamTest {
public void testLeadingWithContraction() throws IOException {
int count = 0;
- ADParagraphStream stream = openData();
+ ADSentenceStream stream = openData();
- ADParagraphStream.Paragraph paragraph = stream.read();
+ ADSentenceStream.Sentence paragraph = stream.read();
while(paragraph != null) {
count++;
@@ -60,9 +60,9 @@ public class ADParagraphStreamTest {
assertEquals(4, count);
}
- private static ADParagraphStream openData() throws IOException {
+ private static ADSentenceStream openData() throws IOException {
InputStream in = ADParagraphStreamTest.class.getResourceAsStream("/opennlp/tools/formats/ad.sample");
- return new ADParagraphStream(new PlainTextByLineStream(in, "UTF-8"));
+ return new ADSentenceStream(new PlainTextByLineStream(in, "UTF-8"));
}
}