You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2016/11/09 21:11:06 UTC

[12/16] opennlp git commit: OPENNLP-622 Preparing to migrate morfologik-addon to main repository

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java b/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
deleted file mode 100644
index 0a7ba48..0000000
--- a/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.morfologik.builder;
-
-import java.io.File;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardCopyOption;
-
-import junit.framework.TestCase;
-import morfologik.stemming.DictionaryMetadata;
-import opennlp.morfologik.lemmatizer.MorfologikLemmatizer;
-
-import org.junit.Test;
-
-public class POSDictionayBuilderTest extends TestCase {
-
-  @Test
-  public void testBuildDictionary() throws Exception {
-    
-    Path output = createMorfologikDictionary();
-
-    MorfologikLemmatizer ml = new MorfologikLemmatizer(output);
-
-    assertNotNull(ml);
-  }
-  
-  public static Path createMorfologikDictionary() throws Exception {
-    Path tabFilePath = File.createTempFile(
-        POSDictionayBuilderTest.class.getName(), ".txt").toPath();
-    Path infoFilePath = DictionaryMetadata.getExpectedMetadataLocation(tabFilePath);
-    
-    Files.copy(POSDictionayBuilderTest.class.getResourceAsStream(
-        "/dictionaryWithLemma.txt"), tabFilePath, StandardCopyOption.REPLACE_EXISTING);
-    Files.copy(POSDictionayBuilderTest.class.getResourceAsStream(
-        "/dictionaryWithLemma.info"), infoFilePath, StandardCopyOption.REPLACE_EXISTING);
-    
-    MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder();
-    
-    return builder.build(tabFilePath);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java b/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
deleted file mode 100644
index 6b7525e..0000000
--- a/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package opennlp.morfologik.lemmatizer;
-
-import static org.junit.Assert.assertEquals;
-
-import java.nio.file.Path;
-
-import opennlp.morfologik.builder.POSDictionayBuilderTest;
-import opennlp.tools.lemmatizer.DictionaryLemmatizer;
-
-import org.junit.Test;
-
-public class MorfologikLemmatizerTest {
-
-  @Test
-  public void testLemmatizeInsensitive() throws Exception {
-    DictionaryLemmatizer dict = createDictionary(false);
-
-    assertEquals("casar", dict.lemmatize("casa", "V"));
-    assertEquals("casa", dict.lemmatize("casa", "NOUN"));
-
-    assertEquals("casa", dict.lemmatize("Casa", "PROP"));
-
-  }
-
-  private MorfologikLemmatizer createDictionary(boolean caseSensitive)
-      throws Exception {
-
-    Path output = POSDictionayBuilderTest.createMorfologikDictionary();
-
-    MorfologikLemmatizer ml = new MorfologikLemmatizer(output);
-
-    return ml;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java b/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
deleted file mode 100644
index c6c9e04..0000000
--- a/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
+++ /dev/null
@@ -1,78 +0,0 @@
-package opennlp.morfologik.tagdict;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.util.Arrays;
-import java.util.List;
-
-import morfologik.stemming.Dictionary;
-import opennlp.morfologik.builder.POSDictionayBuilderTest;
-import opennlp.tools.postag.TagDictionary;
-
-import org.junit.Test;
-
-public class MorfologikTagDictionaryTest {
-
-  @Test
-  public void testNoLemma() throws Exception {
-    MorfologikTagDictionary dict = createDictionary(false);
-
-    List<String> tags = Arrays.asList(dict.getTags("carro"));
-    assertEquals(1, tags.size());
-    assertTrue(tags.contains("NOUN"));
-
-  }
-
-  @Test
-  public void testPOSDictionaryInsensitive() throws Exception {
-    TagDictionary dict = createDictionary(false);
-
-    List<String> tags = Arrays.asList(dict.getTags("casa"));
-    assertEquals(2, tags.size());
-    assertTrue(tags.contains("NOUN"));
-    assertTrue(tags.contains("V"));
-
-    // this is the behavior of case insensitive dictionary
-    // if we search it using case insensitive, Casa as a proper noun
-    // should be lower case in the dictionary
-    tags = Arrays.asList(dict.getTags("Casa"));
-    assertEquals(2, tags.size());
-    assertTrue(tags.contains("NOUN"));
-    assertTrue(tags.contains("V"));
-
-  }
-
-  @Test
-  public void testPOSDictionarySensitive() throws Exception {
-    TagDictionary dict = createDictionary(true);
-
-    List<String> tags = Arrays.asList(dict.getTags("casa"));
-    assertEquals(2, tags.size());
-    assertTrue(tags.contains("NOUN"));
-    assertTrue(tags.contains("V"));
-
-    // this is the behavior of case insensitive dictionary
-    // if we search it using case insensitive, Casa as a proper noun
-    // should be lower case in the dictionary
-    tags = Arrays.asList(dict.getTags("Casa"));
-    assertEquals(1, tags.size());
-    assertTrue(tags.contains("PROP"));
-
-  }
-
-  private MorfologikTagDictionary createDictionary(boolean caseSensitive)
-      throws Exception {
-    return this.createDictionary(caseSensitive, null);
-  }
-
-  private MorfologikTagDictionary createDictionary(boolean caseSensitive,
-      List<String> constant) throws Exception {
-
-    Dictionary dic = Dictionary.read(POSDictionayBuilderTest.createMorfologikDictionary());
-    MorfologikTagDictionary ml = new MorfologikTagDictionary(dic, caseSensitive);
-
-    return ml;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java b/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
deleted file mode 100644
index 7341a02..0000000
--- a/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.morfologik.tagdict;
-
-import static org.junit.Assert.*;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.file.Path;
-
-import opennlp.morfologik.builder.POSDictionayBuilderTest;
-import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.POSSample;
-import opennlp.tools.postag.POSTaggerFactory;
-import opennlp.tools.postag.POSTaggerME;
-import opennlp.tools.postag.TagDictionary;
-import opennlp.tools.postag.WordTagSampleStream;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.TrainingParameters;
-import opennlp.tools.util.model.ModelType;
-
-import org.junit.Test;
-
-/**
- * Tests for the {@link POSTaggerFactory} class.
- */
-public class POSTaggerFactoryTest {
-
-  private static ObjectStream<POSSample> createSampleStream()
-      throws IOException {
-    InputStream in = POSTaggerFactoryTest.class.getClassLoader()
-        .getResourceAsStream("AnnotatedSentences.txt");
-
-    return new WordTagSampleStream((new InputStreamReader(in)));
-  }
-
-  static POSModel trainPOSModel(ModelType type, POSTaggerFactory factory)
-      throws IOException {
-    return POSTaggerME.train("en", createSampleStream(),
-        TrainingParameters.defaultParams(), factory);
-  }
-
-  @Test
-  public void testPOSTaggerWithCustomFactory() throws Exception {
-
-    Path dictionary = POSDictionayBuilderTest.createMorfologikDictionary();
-    POSTaggerFactory inFactory = new MorfologikPOSTaggerFactory();
-    TagDictionary inDict = inFactory.createTagDictionary(dictionary.toFile());
-    inFactory.setTagDictionary(inDict);
-
-    POSModel posModel = trainPOSModel(ModelType.MAXENT, inFactory);
-
-    POSTaggerFactory factory = posModel.getFactory();
-    assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
-
-    factory = null;
-    
-    ByteArrayOutputStream out = new ByteArrayOutputStream();
-    posModel.serialize(out);
-    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
-
-    POSModel fromSerialized = new POSModel(in);
-
-    factory = fromSerialized.getFactory();
-    assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
-    
-    assertEquals(2, factory.getTagDictionary().getTags("casa").length);
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/src/test/resources/AnnotatedSentences.txt
----------------------------------------------------------------------
diff --git a/src/test/resources/AnnotatedSentences.txt b/src/test/resources/AnnotatedSentences.txt
deleted file mode 100644
index b40be87..0000000
--- a/src/test/resources/AnnotatedSentences.txt
+++ /dev/null
@@ -1,136 +0,0 @@
-Last_JJ September_NNP ,_, I_PRP tried_VBD to_TO find_VB out_RP the_DT address_NN of_IN an_DT old_JJ school_NN friend_NN whom_WP I_PRP had_VBD not_RB seen_VBN for_IN 15_CD years_NNS ._.
-I_PRP just_RB knew_VBD his_PRP$ name_NN ,_, Alan_NNP McKennedy_NNP ,_, and_CC I_PRP 'd_MD heard_VBD the_DT rumour_NN that_IN he_PRP 'd_MD moved_VBD to_TO Scotland_NNP ,_, the_DT country_NN of_IN his_PRP$ ancestors_NNS ._.
-So_IN I_PRP called_VBD Julie_NNP ,_, a_DT friend_NN who's_WDT still_RB in_IN contact_NN with_IN him_PRP ._.
-She_PRP told_VBD me_PRP that_IN he_PRP lived_VBD in_IN 23213_CD Edinburgh_NNP ,_, Worcesterstreet_NNP 12_CD ._.
-I_PRP wrote_VBD him_PRP a_DT letter_NN right_RB away_RB and_CC he_PRP answered_VBD soon_RB ,_, sounding_VBG very_RB happy_JJ and_CC delighted_JJ ._.
-
-Last_JJ year_NN ,_, I_PRP wanted_VBD to_TO write_VB a_DT letter_NN to_TO my_PRP$ grandaunt_NN ._.
-Her_PRP$ 86_CD th_NN birthday_NN was_VBD on_IN October_NNP 6_CD ,_, and_CC I_PRP no_RB longer_RB wanted_VBD to_TO be_VB hesitant_JJ to_TO get_VB in_IN touch_NN with_IN her_PRP ._.
-I_PRP did_VBD not_RB know_VB her_PRP face-to-face_RB ,_, and_CC so_RB it_PRP was_VBD not_RB easy_JJ for_IN me_PRP to_TO find_VB out_RP her_PRP$ address_NN ._.
-As_IN she_PRP had_VBD two_CD apartments_NNS in_IN different_JJ countries_NNS ,_, I_PRP decided_VBD to_TO write_VB to_TO both_DT ._.
-The_DT first_JJ was_VBD in_IN 12424_CD Paris_NNP in_IN Rue-de-Grandes-Illusions_NNP 5_CD ._.
-But_CC Marie_NNP Clara_NNP ,_, as_IN my_PRP$ aunt_NN is_VBZ called_VBN ,_, prefered_VBN her_PRP$ apartment_NN in_IN Berlin_NNP ._.
-It_PRP 's_VBZ postcode_JJ is_VBZ 30202_CD ._.
-She_PRP lived_VBD there_RB ,_, in_IN beautiful_JJ Kaiserstra\ufffde_NNP 13_CD ,_, particulary_NN in_IN summer_NN ._.
-
-Hi_UH my_PRP$ name_NN is_VBZ Stefanie_NNP Schmidt_NNP ,_, how_WRB much_RB is_VBZ a_DT taxi_NN from_IN Ostbahnhof_NNP to_TO Hauptbahnhof_NNP ?_.
-About_IN 10_CD Euro_NNP ,_, I_PRP reckon_VBP ._.
-That_DT sounds_VBZ good_JJ ._.
-So_RB please_VB call_VB a_DT driver_NN to_TO Leonardstra\ufffde_NNP 112_CD ,_, near_IN the_DT Ostbahnhof_NNP in_IN 56473_CD Hamburg_NNP ._.
-I_PRP 'd_MD like_VB to_TO be_VB at_IN Silberhornstra\ufffde_NNP 12_CD as_RB soon_RB as_IN possible_JJ ._.
-Thank_VB you_PRP very_RB much_RB !_.
-
-Hi_NNP Mike_NNP ,_, it_PRP 's_VBZ Stefanie_NNP Schmidt_NNP ._.
-I_PRP 'm_VBP in_IN N\ufffdrnberg_NNP at_IN the_DT moment_NN and_CC I_PRP 've_VBP got_VBD the_DT problem_NN that_IN my_PRP$ bike_NN has_VBZ broken_VBN ._.
-Could_MD you_PRP please_VB pick_VB me_PRP up_RP from_IN Seidlstra\ufffde_NNP 56_CD ,_, I_PRP 'm_VBP in_IN the_DT Caf\ufffd_NNP "Mondnacht"_NNP at_IN the_DT moment_NN ._.
-Please_VB hurry_VB up_RB ,_, I_PRP need_VBP to_TO be_VB back_RB in_IN Ulm_NNP at_IN 8_CD p.m._NN !_.
-
-My_PRP$ husband_NN George_NNP and_CC me_PRP recently_RB celebrated_VBD our_PRP$ 10_CD th_JJ wedding_NN anniversary_NN ._.
-We_PRP got_VBD married_VBN on_IN March_NNP 11_CD ,_, 1995_CD ._.
-Therefore_RB ,_, we_PRP found_VBD a_DT photo_NN album_NN with_IN pictures_NNS of_IN our_PRP$ first_JJ own_JJ apartment_NN ,_, which_WDT was_VBD in_IN 81234_CD Munich_NNP ._.
-As_IN a_DT young_JJ married_JJ couple_NN ,_, we_PRP did_VBD not_RB have_VB enough_JJ money_NN to_TO afford_VB a_DT bigger_JJR lodge_NN than_IN this_DT one_CD in_IN Blumenweg_NNP 1_CD ._.
-But_CC only_RB five_CD years_NNS later_RB ,_, my_PRP$ husband_NN was_VBD offered_VBN a_DT well-payed_JJ job_NN in_IN 17818_CD Hamburg_NNP ,_, so_IN we_PRP moved_VBD there_RB ._.
-Since_IN then_RB ,_, our_PRP$ guests_NNS have_VBP to_TO ring_VB at_IN Veilchenstra\ufffde_NNP 11_CD if_IN they_PRP want_VBP to_TO visit_VB us_PRP ,_, Luise_NNP and_CC George_NNP Bauer_NNP ._.
-
-I_PRP read_VBD your_PRP$ help-wanted_JJ ad_NN with_IN great_JJ attention_NN ._.
-I_PRP 'm_VBP a_DT student_NN of_IN informatics_NNS ,_, 6th_JJ semester,_NN and_CC I_PRP 'm_VBP very_RB interested_VBN in_IN your_PRP$ part-time_JJ job_NN offer_NN ._.
-I_PRP have_VBP a_DT competent_JJ knowledge_NN of_IN programming_NN and_CC foreign_JJ languages_NNS ,_, like_IN French_JJ and_CC Italian_JJ ._.
-I_PRP 'm_VBP looking_VBG forward_RB to_TO your_PRP$ reply_NN ._.
-
-Alisa_NNP Fernandes_NNP ,_, a_DT tourist_NN from_IN Spain_NNP ,_, went_VBD to_TO the_DT reception_NN desk_NN of_IN the_DT famous_JJ Highfly-Hotel_NNP in_IN 30303_CD Berlin_NNP ._.
-As_IN she_PRP felt_VBD quite_RB homesick_JJ ,_, she_PRP asked_VBD the_DT staff_NN if_IN they_PRP knew_VBD a_DT good_JJ Spanish_JJ restaurant_NN in_IN Berlin_NNP ._.
-The_DT concierge_NN told_VBD her_PRP to_TO go_VB to_TO the_DT "Tapasbar"_NN in_IN Chesterstr._NNP 2_CD ._.
-Alisa_NNP appreciated_VBD the_DT hint_NN and_CC enjoyed_VBD a_DT delicious_JJ traditional_JJ meal_NN ._.
-
-An_DT old_JJ friend_NN from_IN France_NNP is_VBZ currently_RB travelling_VBG around_IN Europe_NNP ._.
-Yesterday_NN ,_, she_PRP arrived_VBD in_IN Berlin_NNP and_CC we_PRP met_VBD up_RP spontaneously_RB ._.
-She_PRP wanted_VBD me_PRP to_TO show_VB her_PRP some_DT famous_JJ sights_NNS ,_, like_IN the_DT Brandenburger_NNP Tor_NNP and_CC the_DT Reichstag_NNP ._.
-But_CC it_PRP was_VBD not_RB easy_JJ to_TO meet_VB up_RP in_IN the_DT city_NN because_IN she_PRP hardly_RB knows_VBZ any_DT streetname_NN or_CC building_NN ._.
-So_IN I_PRP proposed_VBD to_TO meet_VB at_IN a_DT quite_RB local_JJ point:_NN the_DT caf\ufffd_NN "Daily's"_NN in_IN Unter-den-Linden_NNP 18,_CD 30291_CD Berlin_NNP ._.
-It_PRP is_VBZ five_CD minutes_NNS away_RB from_IN the_DT underground_JJ station_NN "Westbad"_NN ._.
-She_PRP found_VBD it_PRP instantly_RB and_CC we_PRP spent_VBD a_DT great_JJ day_NN in_IN the_DT capital_NN ._.
-
-Where_WRB did_VBD you_PRP get_VB those_DT great_JJ shoes_NNS ?_.
-They_PRP look_VBP amazing_JJ ,_, I_PRP love_VBP the_DT colour_NN ._.
-Are_VBP they_PRP made_VBN of_IN leather_NN ?_.
-No,_NNP that_DT 's_VBZ faked_VBN ._.
-But_CC anyway_RB ,_, I_PRP like_VBP them_PRP too_RB ._.
-I_PRP got_VBD them_PRP from_IN Hamburg._NNP
-Do_VBP not_RB you_PRP know_VB the_DT famous_JJ shop_NN in_IN Veilchenstra\ufffde_NNP ?_.
-It_PRP 's_VBZ called_VBN "Twentytwo"_NNP ._.
-I_PRP 've_VBP never_RB heard_VBN of_IN that_DT before_RB ._.
-Could_MD you_PRP give_VB me_PRP the_DT complete_JJ address_NN ?_.
-Sure_JJ ,_, it_PRP 's_VBZ in_IN Veilchenstra\ufffde_NNP 12_CD ,_, in_IN 78181_CD Hamburg_NNP ._.
-I_PRP deem_VBP it_PRP best_RB to_TO write_VB a_DT letter_NN to_TO the_DT owner_NN if_IN the_DT shoes_NNS are_VBP still_RB available_JJ ._.
-His_PRP$ name_NN is_VBZ Gerhard_NNP Fritsch_NNP ._.
-
-Hi_UH ,_, am_VBP I_PRP talking_VBG to_TO the_DT inquiries_NNS ?_.
-My_PRP$ name_NN is_VBZ Mike_NNP Sander_NNP and_CC I_PRP 'd_MD like_VB to_TO know_VB if_IN it_PRP is_VBZ possible_JJ to_TO get_VB information_NN about_IN an_DT address_NN if_IN I_PRP merely_RB know_VBP the_DT name_NN and_CC the_DT phone_NN number_NN of_IN a_DT person_NN !_.
-How_WRB is_VBZ he_PRP or_CC she_PRP called_VBD ?_.
-His_PRP$ name_NN is_VBZ Stefan_NNP Miller_NNP and_CC his_PRP$ number_NN is_VBZ the_DT 030/827234_CD ._.
-I'll_NNP have_VBP a_DT look_NN in_IN the_DT computer..._NN
-I_PRP found_VBD a_DT Stefan_NNP Miller_NNP who_WP lives_VBZ in_IN Leipzig._NNP
-Is_VBZ that_DT right_NN ?_.
-Yes_UH ,_, it_PRP definitely_RB is_VBZ ._.
-So_RB Stefan_NNP Miller_NNP lives_VBZ in_IN Heinrich-Heine-Stra\ufffde_NNP 112_CD ,_, in_IN 20193_CD Leipzig_NNP ._.
-Thank_VB you_PRP very_RB much_RB for_IN the_DT information_NN ._.
-Bye_NNP !_.
-
-On_IN July_NNP 14_CD ,_, the_DT father_NN of_IN a_DT family_NN got_VBD painfully_RB injured_VBN after_IN he_PRP had_VBD tried_VBN to_TO start_VB a_DT barbecue_NN ._.
-The_DT flaring_VBG flames_NNS burnt_VBP instantly_RB through_IN his_PRP$ jacket_NN ,_, which_WDT he_PRP managed_VBD to_TO pull_VB off_RP last-minute_JJ ._.
-Although_IN the_DT wounds_NNS were_VBD n't_RB life-threatening_JJ ,_, it_PRP was_VBD urgent_JJ to_TO bring_VB him_PRP directly_RB into_IN ambulance_NN ._.
-But_CC the_DT only_JJ hospital_NN that_WDT had_VBD opened_VBN that_IN Sunday_NNP was_VBD the_DT Paracelsus_NNP Hospital_NNP in_IN 83939_CD Weilheim_NNP ,_, which_WDT was_VBD 2_CD hours_NNS away_RB ._.
-Convulsed_JJ with_IN pain_NN ,_, the_DT man_NN finally_RB arrived_VBD in_IN Stifterstra\ufffde_NNP 15_CD ,_, where_WRB the_DT personal_NN immediately_RB took_VBD care_NN of_IN him_PRP ._.
-
-Last_JJ year_NN ,_, I_PRP worked_VBD as_IN a_DT delivery_NN boy_NN for_IN a_DT small_JJ local_JJ magazine_NN ._.
-I_PRP worked_VBD in_IN the_DT area_NN of_IN 83454_CD Ottobrunn_NNP ._.
-I_PRP had_VBD a_DT list_NN with_IN the_DT home_NN addresses_NNS of_IN our_PRP$ costumers_NNS whom_WP I_PRP brought_VBD their_PRP$ papers_NNS once_RB a_DT week_NN ._.
-An_DT elderly_JJ lady_NN ,_, who_WP was_VBD called_VBN Elenor_NNP Meier_NNP ,_, lived_VBD in_IN G\ufffdrtnerweg_NNP 6_CD ,_, and_CC I_PRP always_RB drove_VBD there_RB first_RB ,_, because_IN I_PRP liked_VBD her_PRP the_DT most_JJS ._.
-Afterwards_RB ,_, I_PRP went_VBD to_TO a_DT student_NN ,_, Gina_NNP Schneider_NNP ,_, who_WP lived_VBD still_RB in_IN her_PRP$ parent's_NNS house_NN in_IN G\ufffdrtnerweg_NNP 25_CD ._.
-The_DT last_JJ in_IN line_NN was_VBD the_DT retired_JJ teacher_NN Bruno_NNP Schulz_NNP in_IN Dramenstra\ufffde_NNP 15_CD ._.
-He_PRP was_VBD friendly_JJ enough_RB to_TO tip_VB sometimes_RB ._.
-
-Our_PRP$ business_NN company_NN was_VBD founded_VBN in_IN 1912_CD by_IN the_DT singer_NN and_CC entertainer_NN Michel_NNP Seile_NNP ._.
-He_PRP opened_VBD the_DT first_JJ agency_NN in_IN Erding_NNP ,_, a_DT small_JJ town_NN near_IN Munich_NNP ._.
-Now_RB ,_, more_JJR than_IN 90_CD years_NNS of_IN turbulent_JJ ups_NNS and_CC downs_NNS later_RB ,_, we_PRP finally_RB decided_VBD to_TO situate_VB our_PRP$ company_NN in_IN a_DT more_JJR central_JJ and_CC frequented_JJ area_NN ._.
-Last_JJ year_NN ,_, we_PRP moved_VBD into_IN an_DT empty_JJ factory_NN building_NN in_IN 30303_CD Berlin_NNP ._.
-It_PRP is_VBZ located_VBN in_IN Barmerstr._NNP 34_CD ._.
-
-When_WRB George_NNP Miller_NNP ,_, a_DT tourist_NN from_IN England_NNP ,_, came_VBD to_TO Munich_NNP ,_, he_PRP had_VBD no_DT idea_NN how_WRB to_TO read_VB the_DT city_NN maps_NNS ._.
-He_PRP depended_VBD completely_RB on_IN the_DT help_NN and_CC information_NN of_IN German_JJ pedestrians_NNS ._.
-One_CD day_NN ,_, he_PRP simply_RB could_MD not_RB find_VB the_DT famous_JJ Lenbachhaus_NNP ._.
-So_RB he_PRP asked_VBD a_DT young_JJ woman_NN for_IN help_NN ._.
-She_PRP pointed_VBD at_IN a_DT street_NN sign_NN and_CC explained_VBD to_TO him_PRP that_IN he_PRP 'd_MD find_VB the_DT Lenbachhaus_NNP in_IN Luisenstra\ufffde_NNP 33_CD ,_, which_WDT is_VBZ in_IN 80333_CD Munich_NNP ._.
-Miller_NNP was_VBD very_RB grateful_JJ and_CC could_MD finally_RB enjoy_VB the_DT exhibition_NN ._.
-
-On_IN March_NNP 15_CD ,_, there_EX was_VBD an_DT accident_NN near_IN Munich_NNP ._.
-The_DT driver_NN got_VBD badly_RB injured_VBN ._.
-Driving_VBG alone_RB not_RB far_RB from_IN her_PRP$ home_NN ,_, the_DT middle-aged_JJ woman_NN crashed_VBD at_IN high_JJ speed_NN into_IN a_DT tree_NN ._.
-A_DT resident_NN ,_, who_WP lives_VBZ near_IN the_DT street_NN where_WRB the_DT accident_NN took_VBD place_NN ,_, called_VBN instantly_RB the_DT police_NN ._.
-He_PRP reported_VBD what_WP had_VBD happened_VBN and_CC gave_VBD his_PRP$ name_NN and_CC address_NN to_TO the_DT officer_NN ._.
-He_PRP 's_VBZ called_VBN Peter_NNP Schubert_NNP and_CC he_PRP lives_VBZ at_IN Max-L\ufffdw-Stra\ufffde_NNP 13_CD in_IN 84630_CD Gauting_NNP ._.
-The_DT police_NN arrived_VBD ten_CD minutes_NNS later_RB and_CC brought_VBD the_DT woman_NN into_IN hospital_NN ._.
-Although_IN she_PRP had_VBD multiple_JJ trauma_NN ,_, she_PRP 's_VBZ out_IN of_IN mortal_JJ danger_NN ._.
-
-Hi_NNP ,_, how_WRB are_VBP you_PRP ?_.
-Are_VBP nt't_RB you_PRP a_DT friend_NN of_IN Natalie_NNP ?_.
-Yeah_UH for_IN sure_JJ ._.
-How_WRB did_VBD you_PRP know_VB that_DT ?_.
-I_PRP saw_VBD you_PRP sitting_VBG next_JJ to_TO her_PRP at_IN uni_JJ ._.
-Yeah_NNP she_PRP 's_VBZ my_PRP$ best_JJS friend_NN ._.
-Are_VBP you_PRP going_VBG to_TO her_PRP party_NN next_JJ friday_NN ?_.
-Oh_UH yes_UH ,_, I_PRP 'd_MD really_RB like_VB to_TO ._.
-But_CC in_IN fact_NN I_PRP do_VBP n't_RB know_VB yet_RB where_WRB it_PRP takes_VBZ place_NN ._.
-I_PRP can_MD tell_VB you_PRP :_: ring_NN at_IN Baumann,_NNP Meisenstra\ufffde_NNP 5_CD ,_, in_IN 81737_CD Munich_NNP ._.
-The_DT party_NN starts_VBZ at_IN 9_CD p.m._NN ._.
-I_PRP hope_VBP you_PRP 'll_MD find_VB it_PRP ._.
-Thank_VB you_PRP very_RB much_RB ,_, see_VBP you_PRP next_JJ friday_NN !_.
-
-My_PRP$ name_NN is_VBZ Michael_NNP Hinterhofer_NNP ._.
-When_WRB I_PRP was_VBD 21_CD ,_, I_PRP moved_VBD out_RP from_IN my_PRP$ parents_NNS home_NN into_IN my_PRP$ first_JJ own_JJ appartment_NN in_IN order_NN to_TO study_VB in_IN a_DT bigger_JJR city_NN ._.
-My_PRP$ new_JJ home_NN was_VBD in_IN Lilienstra\ufffde_NNP 1_CD in_IN 25334_CD Hamburg_NNP ._.
-But_CC I_PRP realized_VBD quickly_RB that_IN life_NN in_IN a_DT metropolis_NN was_VBD n't_RB relaxed_VBN enough_RB for_IN me_PRP ._.
-So_IN I_PRP decided_VBD to_TO move_VB into_IN a_DT smaller_JJR town_NN ._.
-Now_RB I_PRP 'm_VBP a_DT tenant_NN with_IN an_DT elderly_JJ widow_NN ._.
-We_PRP live_VBP in_IN B\ufffdrgerstra\ufffde_NNP 2_CD in_IN 63737_CD Heidelberg_NNP ._.
-I_PRP really_RB like_IN the_DT smalltown_JJ flair_NN and_CC my_PRP$ studies_NNS at_IN Heidelberg_NNP 's_POS notable_JJ university_NN ._.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/src/test/resources/dictionaryWithLemma.info
----------------------------------------------------------------------
diff --git a/src/test/resources/dictionaryWithLemma.info b/src/test/resources/dictionaryWithLemma.info
deleted file mode 100644
index ad5fe8d..0000000
--- a/src/test/resources/dictionaryWithLemma.info
+++ /dev/null
@@ -1,15 +0,0 @@
-#
-# REQUIRED PROPERTIES
-#
-
-# Column (lemma, inflected, tag) separator. This must be a single byte in the target encoding.
-fsa.dict.separator=,
-
-# The charset in which the input is encoded. UTF-8 is strongly recommended.
-fsa.dict.encoding=UTF-8
-
-# The type of lemma-inflected form encoding compression that precedes automaton
-# construction. Allowed values: [suffix, infix, prefix, none].
-# Details are in Daciuk's paper and in the code. 
-# Leave at 'prefix' if not sure.
-fsa.dict.encoder=prefix
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/src/test/resources/dictionaryWithLemma.txt
----------------------------------------------------------------------
diff --git a/src/test/resources/dictionaryWithLemma.txt b/src/test/resources/dictionaryWithLemma.txt
deleted file mode 100644
index 09d39e3..0000000
--- a/src/test/resources/dictionaryWithLemma.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-casa,casa,NOUN
-casar,casa,V
-casar,casar,V-INF
-Casa,Casa,PROP
-casa,casinha,NOUN
-casa,casona,NOUN
-menino,menina,NOUN
-menino,menino,NOUN
-menino,menin�o,NOUN
-menino,menininho,NOUN
-carro,carro,NOUN
\ No newline at end of file