You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by bg...@apache.org on 2016/11/16 09:11:47 UTC

[51/51] [partial] opennlp-sandbox git commit: merge from bgalitsky's own git repo

merge from bgalitsky's own git repo


Project: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/commit/1f97041b
Tree: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/tree/1f97041b
Diff: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/diff/1f97041b

Branch: refs/heads/master
Commit: 1f97041ba33317481b0f2083f7316a312b8d8b52
Parents: 9aa270c
Author: Boris Galitsky <bg...@hotmail.com>
Authored: Wed Nov 16 10:10:18 2016 -0800
Committer: Boris Galitsky <bg...@hotmail.com>
Committed: Wed Nov 16 10:10:18 2016 -0800

----------------------------------------------------------------------
 opennlp-similarity/README.md                    |   157 +
 opennlp-similarity/README.txt                   |   138 +
 .../lib/edu.mit.jverbnet-1.2.0.jar              |   Bin 0 -> 168444 bytes
 opennlp-similarity/lib/ejml-0.23.jar            |   Bin 0 -> 211938 bytes
 opennlp-similarity/lib/javax.json.jar           |   Bin 0 -> 85147 bytes
 opennlp-similarity/lib/joda-time.jar            |   Bin 0 -> 570478 bytes
 opennlp-similarity/lib/jollyday.jar             |   Bin 0 -> 200993 bytes
 opennlp-similarity/lib/xom.jar                  |   Bin 0 -> 313253 bytes
 opennlp-similarity/src/main/java/com.zip        |   Bin 0 -> 47523 bytes
 .../tools/apps/object_dedup/DedupResult.java    |    31 +
 .../object_dedup/SimilarityAccessorBase.java    |   739 +
 .../main/java/opennlp/tools/doc_classifier.zip  |   Bin 0 -> 15046 bytes
 .../ClassifierTrainingSetIndexer.java           |   266 +
 .../tools/doc_classifier/DocClassifier.java     |   306 +
 ...assifierTrainingSetMultilingualExtender.java |   284 +
 .../DocClassifierTrainingSetVerifier.java       |   163 +
 .../enron_email_recognizer/EmailNormalizer.java |   100 +
 .../EmailTrainingSetFormer.java                 |    67 +
 .../opennlp/tools/fca/BasicLevelMetrics.java    |   676 +
 .../java/opennlp/tools/fca/ConceptLattice.java  |   298 +
 .../java/opennlp/tools/fca/FcaConverter.java    |    72 +
 .../main/java/opennlp/tools/fca/FcaReader.java  |    95 +
 .../main/java/opennlp/tools/fca/FcaWriter.java  |   131 +
 .../java/opennlp/tools/fca/FormalConcept.java   |   152 +
 .../main/java/opennlp/tools/fca/Measures.java   |   147 +
 .../opennlp/tools/fca/RandomNoiseGenerator.java |    63 +
 .../opennlp/tools/jsmlearning/JSMDecision.java  |    78 +
 .../jsmlearning/JSMLearnerOnLatticeBase.java    |   338 +
 .../JSMLearnerOnLatticeWithAbduction.java       |    88 +
 .../JSMLearnerOnLatticeWithDeduction.java       |   243 +
 .../parse_thicket/ParseCorefBuilderWithNER.java |   156 +
 .../tools/parse_thicket/VerbNetProcessor.java   |   267 +
 .../external_rst/ExternalRSTImporter.java       |   264 +
 .../external_rst/MatcherExternalRST.java        |    96 +
 .../PT2ThicketPhraseBuilderExtrnlRST.java       |   115 +
 .../ParseCorefBuilderWithNERandRST.java         |   268 +
 .../ParseThicketWithDiscourseTree.java          |   284 +
 .../parse_thicket/external_rst/RstNode.java     |   119 +
 .../kernel_interface/BracesProcessor.java       |   197 +
 .../DescriptiveParagraphFromDocExtractor.java   |   168 +
 .../TreeKernelBasedClassifier.java              |   272 +
 .../TreeKernelBasedClassifierMultiplePara.java  |   200 +
 ...reeKernelBasedClassifierOfDiscourseTree.java |   326 +
 .../style_classif/TSNE_ImporterProcessor.java   |    94 +
 .../matching/FrameQueryBasedIExtractor.java     |   153 +
 .../matching/GeneralizationResult.java          |    66 +
 .../matching/LemmaGeneralizer.java              |    94 +
 .../tools/parse_thicket/matching/MyMatcher.java |   126 +
 .../matching/NERPhraseGeneralizer.java          |   271 +
 .../matching/ParseTreeNodeGeneralizer.java      |    68 +
 .../matching/PartOfSpeechGeneralizer.java       |    77 +
 .../matching/PersonalInformationExtractor.java  |   159 +
 .../matching/PhraseGeneralizer.java             |   268 +
 .../matching/PhraseGroupGeneralizer.java        |   128 +
 .../opinion_processor/AbstractEngineRunner.java |   120 +
 .../DefaultSentimentProcessor.java              |   523 +
 .../EntityExtractionResult.java                 |   158 +
 .../ExpressionSentimentAnalyzer.java            |    56 +
 .../LinguisticPhraseManager.java                |   591 +
 .../opinion_processor/NamedEntityExtractor.java |   185 +
 .../opinion_processor/PersonExtractor.java      |    96 +
 .../SentencePhraseGivenAWordGetter.java         |   107 +
 .../SentimentCoreAnnotations.java               |    41 +
 .../opinion_processor/StopList.java             |   401 +
 .../TopicAsOpinionMinerRunner.java              |   117 +
 .../opinion_processor/TopicPhraseExtractor.java |   212 +
 .../opinion_processor/TwitterEngineRunner.java  |   157 +
 .../opinion_processor/TwitterFilter.java        |   165 +
 .../opinion_processor/YouTubeMiner.java         |    97 +
 .../opinion_processor/YouTubeMinerResult.java   |    38 +
 .../LinguisticPatternStructure.java             |   192 +
 .../PatternStructureWriter.java                 |    59 +
 ...KernelBasedRecognizerOfRequest_Response.java |    94 +
 .../YahooAnswersTrainingSetCreator.java         |   118 +
 .../apps/BingRelatedSpellingQueryRunner.java    |   105 +
 .../apps/GoogleAutoCompleteQueryRunner.java     |   124 +
 .../similarity/apps/SentenceTranslate.java.txt  |   212 +
 .../tools/similarity/apps/solr/Comment.java     |   118 +
 .../tools/similarity/apps/solr/CommentsRel.java |   139 +
 .../apps/solr/QueryExpansionRequestHandler.java |   105 +
 ...earchResultsReRankerStanfRequestHandler.java |   306 +
 .../similarity/apps/solr/WordDocBuilder.java    |   270 +
 .../apps/solr/WordDocBuilderEndNotes.java       |   223 +
 .../WordDocBuilderSingleImageSearchCall.java    |   174 +
 .../apps/taxo_builder/CsvAdapter.java           |    74 +
 .../taxo_builder/DomainTaxonomyExtender.java    |   230 +
 .../java/opennlp/tools/stemmer/PStemmer.java    |   521 +
 .../tools/word2vec/W2VDistanceMeasurer.java     |   124 +
 .../test/java/opennlp/tools/fca/FCATest.java    |    79 +
 .../external_rst/ExternalRSTImporterTest.java   |    47 +
 .../JSMLearnerOnLatticeTest.java                |   317 +
 .../pattern_structure/PatternStructureTest.java |   189 +
 .../pattern_structure/PhraseTest.java           |   171 +
 .../test/resources/external_rst/resInput.txt    |    62 +
 .../src/test/resources/fca/sports.cxt           |    55 +
 .../src/test/resources/new_vn.zip               |   Bin 0 -> 389230 bytes
 .../resources/style_recognizer/all-tsne2.txt    |  1051 ++
 .../txt/Bib1/27Bib1Amos_Prophecy_12_EN.txt.txt  |     2 +
 .../txt/Bib1/28Bib1Amos_Prophecy_89_EN.txt.txt  |     2 +
 .../txt/Bib1/29Bib1Ezra_History_12_EN.txt.txt   |     2 +
 .../txt/Bib1/30Bib1Joel_Prophecy_12_EN.txt.txt  |     2 +
 .../txt/Bib1/31Bib1Ruth_History_12_EN.txt.txt   |     2 +
 .../txt/Bib2/32Bib2John_Gospel_12_EN.txt.txt    |     2 +
 .../txt/Bib2/33Bib2Jude_Epistle_1_EN.txt.txt    |     2 +
 .../txt/Bib2/34Bib2Luke_Acts_12_EN.txt.txt      |     2 +
 .../txt/Bib2/35Bib2Luke_Acts_2728_EN.txt.txt    |     2 +
 .../txt/Bib2/36Bib2Luke_Gospel_12_EN.txt.txt    |     2 +
 .../txt/Bib2/37Bib2Mark_Gospel_12_EN.txt.txt    |     2 +
 .../txt/Corp/38CorpApple_Environment_EN.txt.txt |     2 +
 .../txt/Corp/39CorpHandM_Quality_EN.txt.txt     |     2 +
 .../Corp/40CorpHandM_Responsibility_EN.txt.txt  |     2 +
 .../txt/Corp/41CorpHP_Objectives_EN.txt.txt     |     2 +
 .../Corp/42CorpJaguar_Sustainability_EN.txt.txt |     2 +
 .../txt/Corp/43CorpZara_Environment_EN.txt.txt  |     2 +
 .../txt/Fict/44FictAndersenH_Mermaid_EN.txt.txt |     2 +
 .../txt/Fict/45FictAndersenH_Ugly_EN.txt.txt    |     2 +
 .../txt/Fict/46FictBalzacH_Goriot_Ia_EN.txt.txt |     2 +
 .../txt/Fict/47FictBalzacH_Goriot_Ib_EN.txt.txt |     2 +
 .../Fict/48FictBronteC_JaneEyre_33_EN.txt.txt   |     2 +
 .../Fict/49FictBronteC_JaneEyre_6_EN.txt.txt    |     2 +
 .../Fict/50FictCarrollL_Wonderland_2_EN.txt.txt |     2 +
 .../Fict/51FictCarrollL_Wonderland_5_EN.txt.txt |     2 +
 .../52FictDostoyevskyF_CrimePun_ep_EN.txt.txt   |     2 +
 .../53FictDostoyevskyF_CrimePun_II2_EN.txt.txt  |     2 +
 .../txt/Fict/54FictDumasAp_3Musk_11_EN.txt.txt  |     2 +
 .../txt/Fict/55FictDumasAp_3Musk_2_EN.txt.txt   |     2 +
 .../Fict/56FictFlaubertG_Bovary_I5_EN.txt.txt   |     2 +
 .../Fict/57FictFlaubertG_Bovary_III3_EN.txt.txt |     2 +
 .../Fict/58FictFlaubertG_Salammbo_11_EN.txt.txt |     2 +
 .../Fict/59FictFlaubertG_Salammbo_2_EN.txt.txt  |     2 +
 .../Fict/60FictGrimmJ_Bremusicians_EN.txt.txt   |     2 +
 .../Fict/61FictGrimmJ_HanselGretel_EN.txt.txt   |     2 +
 .../Fict/62FictHugoV_Hunchback_II5_EN.txt.txt   |     2 +
 .../Fict/63FictHugoV_Hunchback_IV3_EN.txt.txt   |     2 +
 .../txt/Fict/64FictPoeE_Purloined_EN.txt.txt    |     2 +
 .../txt/Fict/65FictPoeE_Usher_EN.txt.txt        |     2 +
 .../Fict/66FictTolstoyL_AnnaK_I10_EN.txt.txt    |     2 +
 .../Fict/67FictTolstoyL_AnnaK_I11_EN.txt.txt    |     2 +
 .../Fict/68FictTolstoyL_WarPeace_I16_EN.txt.txt |     2 +
 .../Fict/69FictTolstoyL_WarPeace_I2_EN.txt.txt  |     2 +
 .../txt/Fict/70FictWildeO_Dorian_18_EN.txt.txt  |     2 +
 .../txt/Fict/71FictWildeO_Dorian_8_EN.txt.txt   |     2 +
 .../Fict/72FictZolaE_Germinal_II4_EN.txt.txt    |     2 +
 .../Fict/73FictZolaE_Germinal_VII2_EN.txt.txt   |     2 +
 .../txt/Marx/74MarxMarxK_ComMan_01_EN.txt.txt   |     2 +
 .../txt/Marx/75MarxMarxK_ComMan_24_EN.txt.txt   |     2 +
 .../txt/Marx/76MarxMarxK_Feuerbach_EN.txt.txt   |     2 +
 .../txt/Marx/77MarxMarxK_GothaProg_EN.txt.txt   |     2 +
 .../txt/Marx/78MarxMarxK_JewishQ_EN.txt.txt     |     2 +
 ...k_international_story_0,,631931,00.html.txt" |     2 +
 ....bbc.co.uk_2_hi_middle_east_5136988.stm.txt" |     2 +
 ...bbc.co.uk_2_hi_asia-pacific_2181151.stm.txt" |     2 +
 ...ews.bbc.co.uk_2_hi_business_2235352.stm.txt" |     2 +
 ..._money.guardian.co.uk_news__0,,,00.html.txt" |     2 +
 ...n.co.uk_Match_Report_0,,1536875,00.html.txt" |     2 +
 ...bbc.co.uk_2_hi_asia-pacific_5133220.stm.txt" |     2 +
 ..._news.bbc.co.uk_2_hi_europe_4263426.stm.txt" |     2 +
 ...ews.bbc.co.uk_2_hi_business_2769931.stm.txt" |     2 +
 ...o.uk_2_hi_uk_news_education_2191422.stm.txt" |     2 +
 ..._news.bbc.co.uk_2_hi_africa_2909769.stm.txt" |     2 +
 ...news.bbc.co.uk_2_hi_uk_news_2509561.stm.txt" |     2 +
 .../txt/News/79NewsEnnajiM_Maghreb_EN.txt.txt   |     2 +
 .../News/80NewsGoalcom_MessiTop50_EN.txt.txt    |     2 +
 .../txt/News/81NewsGueyeA_BlackPete_EN.txt.txt  |     2 +
 .../News/82NewsLeM_OrbanGoldmanSachs_EN.txt.txt |     2 +
 ...sMendesFrancoJ_HaitiBeyondCapital_EN.txt.txt |     2 +
 ...NewsMillerH_FrankensteinTradition_EN.txt.txt |     2 +
 .../85NewsRabinovichI_IranNuclear_EN.txt.txt    |     2 +
 .../txt/News/86NewsRian_IranCutsOil_EN.txt.txt  |     2 +
 .../87NewsRian_MedvedevDismisses_EN.txt.txt     |     2 +
 .../News/88NewsWiki_KaradzicArrest_EN.txt.txt   |     2 +
 .../txt/Opac/89OpacStallman_FreeSoft_EN.txt.txt |     2 +
 .../txt/Opac/90OpacTeam_Berlin_EN.txt.txt       |     2 +
 .../txt/Opac/91OpacTeam_Budapest_EN.txt.txt     |     2 +
 .../Tedi/100TediOConnellA_Quantum_EN.txt.txt    |     2 +
 .../txt/Tedi/101TediRoslingH_Poverty_EN.txt.txt |     2 +
 .../102TediSinclairC_OpenArchitech_EN.txt.txt   |     2 +
 .../92TediAndersonEt_NuclearEnergy_EN.txt.txt   |     2 +
 .../Tedi/93TediBelcherA_Batteries_EN.txt.txt    |     2 +
 .../txt/Tedi/94TediGellMannM_Lang_EN.txt.txt    |     2 +
 .../Tedi/95TediJakubowskiM_OpenTech_EN.txt.txt  |     2 +
 .../Tedi/96TediJordanC_ShockStats_EN.txt.txt    |     2 +
 .../txt/Tedi/97TediMcKeanE_Dict_EN.txt.txt      |     2 +
 .../txt/Tedi/98TediMillerA_News_EN.txt.txt      |     2 +
 .../Tedi/99TediNatchweyJ_WarPhotos_EN.txt.txt   |     2 +
 .../txt/Tele/103TeleHTC_Manual_0_EN.txt.txt     |     2 +
 .../txt/Tele/104TeleHTC_Manual_10_EN.txt.txt    |     2 +
 .../txt/Tele/105TeleHTC_Manual_11_EN.txt.txt    |     2 +
 .../txt/Tele/106TeleHTC_Manual_12_EN.txt.txt    |     2 +
 .../txt/Tele/107TeleHTC_Manual_13_EN.txt.txt    |     2 +
 .../txt/Tele/108TeleHTC_Manual_1_EN.txt.txt     |     2 +
 .../txt/Tele/109TeleHTC_Manual_2_EN.txt.txt     |     2 +
 .../txt/Tele/110TeleHTC_Manual_3_EN.txt.txt     |     2 +
 .../txt/Tele/111TeleHTC_Manual_4_EN.txt.txt     |     2 +
 .../txt/Tele/112TeleHTC_Manual_5_EN.txt.txt     |     2 +
 .../txt/Tele/113TeleHTC_Manual_6_EN.txt.txt     |     2 +
 .../txt/Tele/114TeleHTC_Manual_7_EN.txt.txt     |     2 +
 .../txt/Tele/115TeleHTC_Manual_8_EN.txt.txt     |     2 +
 .../txt/Tele/116TeleHTC_Manual_9_EN.txt.txt     |     2 +
 .../txt/Teli/117TeliApple_iPhone4_EN.txt.txt    |     2 +
 .../txt/Tels/118TelsGoog_Answer_0545_EN.txt.txt |     2 +
 .../txt/Tels/119TelsGoog_Answer_0a46_EN.txt.txt |     2 +
 .../txt/Tels/120TelsGoog_Answer_0bcf_EN.txt.txt |     2 +
 .../txt/Tels/121TelsGoog_Answer_1851_EN.txt.txt |     2 +
 .../txt/Tels/122TelsGoog_Answer_1b37_EN.txt.txt |     2 +
 .../txt/Tels/123TelsGoog_Answer_2feb_EN.txt.txt |     2 +
 .../txt/Tels/124TelsGoog_Answer_3024_EN.txt.txt |     2 +
 .../txt/Tels/125TelsGoog_Answer_4b5b_EN.txt.txt |     2 +
 .../txt/Tels/126TelsGoog_Answer_596f_EN.txt.txt |     2 +
 .../txt/Tels/127TelsGoog_Answer_5b4c_EN.txt.txt |     2 +
 .../txt/Tels/128TelsGoog_Answer_70d7_EN.txt.txt |     2 +
 .../txt/Tels/129TelsGoog_Answer_770f_EN.txt.txt |     2 +
 .../txt/Tels/130TelsGoog_Answer_91e4_EN.txt.txt |     2 +
 .../txt/Tels/131TelsGoog_Answer_94d0_EN.txt.txt |     2 +
 .../txt/Tels/132TelsGoog_Answer_a67e_EN.txt.txt |     2 +
 .../133UnitGass_Resolution_62250_EN.txt.txt     |     2 +
 .../txt/Unit/134UnitUnat_Charter_EN.txt.txt     |     2 +
 .../txt/Unit/135UnitUnat_HumanRights_EN.txt.txt |     2 +
 .../txt/Unit/136UnitUnat_StatuteICOJ_EN.txt.txt |     2 +
 ...indParaschivoiuEt_DarrieusTurbine_EN.txt.txt |     2 +
 .../Wind/138WindPurohitEt_CDMIndia_EN.txt.txt   |     2 +
 ...ndRiadhEt_ContrarotatingConverter_EN.txt.txt |     2 +
 ...te\" url=\"http:__vue.org.uk_carlos.htm.txt" |     2 +
 ...ived_game.asp?MatchID=89&Season=2002_03.txt" |     2 +
 ...thoholidays.co.uk_ShowDetails.asp?id=96.txt" |     2 +
 ...news.org.uk_news_news_detail.asp?nid=22.txt" |     2 +
 ...url=\"http:__www.benhs.org.uk_anex.html.txt" |     2 +
 ..._guides_guides.php?subject=ratsthatbite.txt" |     2 +
 ...es_selected_cases_PCA_sc9903_c682b.html.txt" |     2 +
 ...l.cam.ac.uk_call_translation_toolkit_6_.txt" |     2 +
 ..._nnr-scotland_news_detail.asp?newsID=79.txt" |     2 +
 ...c.uk_Conferences_BPCSR05_submission.htm.txt" |     2 +
 ...co.uk_flatfiles_paulpearcetributes.aspx.txt" |     2 +
 ...resence.co.uk_phpBB2_viewtopic.php?t=97.txt" |     2 +
 ...loan.co.uk_article_Mortgages-1212.shtml.txt" |     2 +
 ...www.brainbashers.co.uk_droodlesprev.asp.txt" |     2 +
 ...onics.org.uk_newsletter_NoticeBoard.php.txt" |     2 +
 ..."http:__www.snh.org.uk_calendar_jul.asp.txt" |     2 +
 ..._Northamptonshire_jobs-in-Brackley.html.txt" |     2 +
 ...timatefilm_chart_details.php?ranking=65.txt" |     2 +
 ....co.uk_online_pr_online_pr.ehtml?o=1647.txt" |     2 +
 ...ling_human_organ_donation_dilemma_.html.txt" |     2 +
 ...www.sscs.bham.ac.uk_phsi_eating_bmi.htm.txt" |     2 +
 ...elected_cases_HSC_IC0107_pt1-e2242.html.txt" |     2 +
 ...org.uk_index.asp?contentid=21&menuid=21.txt" |     2 +
 ...d\" url=\"http:__www.ebe.org.uk_ccn.htm.txt" |     2 +
 ...humberland.gov.uk_vg_text_northpen.html.txt" |     2 +
 ..._reports_1992_theatre_museum_index.html.txt" |     2 +
 ...easynet.co.uk_jim.shead_River-Arun.html.txt" |     2 +
 ...wson-cruttenden.co.uk_conveyancing.html.txt" |     2 +
 ...radio.co.uk_acatalog_Vert_Arno_Ant.html.txt" |     2 +
 ...pendent.co.uk_europe_article1192096.ece.txt" |     2 +
 ..."http:__www.nsbapty.co.uk_Supp-Samp.htm.txt" |     2 +
 ...cp=.._swt_&cg=_&sim=&id=487&pagetype=27.txt" |     2 +
 ...p:__www.surf4wine.co.uk_Eben_Sadie.html.txt" |     2 +
 ...8%2D10+09%3A45%3A00&Logo=0&sort=cp&pg=1.txt" |     2 +
 ...codyfi.org.uk_commfirstactionplanpr.htm.txt" |     2 +
 ...rl=\"http:__www.sefton.gov.uk_page&3630.txt" |     2 +
 ...ective.org.uk_dwboard_messages_112.html.txt" |     2 +
 ...rsale-investment.org.uk_Plot-Sales.html.txt" |     2 +
 ...rdirectory.co.uk_car-leasing-jamjar.htm.txt" |     2 +
 ...\"http:__www.nta.nhs.uk_news_020624.htm.txt" |     2 +
 ....ac.uk_law_lawpages_Victim_Support.html.txt" |     2 +
 ...strialnetworking.co.uk_mag_v7-2_p7.html.txt" |     2 +
 ...php?location=_news_archive_20040628.htm.txt" |     2 +
 ...:__www.idler.co.uk_archives_?page_id=18.txt" |     2 +
 ...t.co.uk_support_index.pl?page=mailboxes.txt" |     2 +
 ...archives_2005_11_backstagebbccou_2.html.txt" |     2 +
 ...rk.co.uk_Health_default.asp?article=135.txt" |     2 +
 ...ingexperience.org.uk_learning_first.php.txt" |     2 +
 ...rtle.co.uk_edfest2006_terrysaunders.htm.txt" |     2 +
 ...s.gov.uk_legal_section21_chapter_f.html.txt" |     2 +
 ...www.tropicalfishcentre.co.uk_Plants.htm.txt" |     2 +
 ...org.uk_bba_jsp_polopoly.jsp?d=155&a=493.txt" |     2 +
 ...__www.burpham.surrey.sch.uk_potter.html.txt" |     2 +
 ...yId=14542&taggingType=4&contentId=11208.txt" |     2 +
 ...o.uk_catalogue_walker_walkerreviews.htm.txt" |     2 +
 ...reports_Spain_andalucia6_and-oct-03.htm.txt" |     2 +
 ...rvicesindex.co.uk_newsletter_aug04.html.txt" |     2 +
 ...x.cfm?fuseaction=details&nNewsID=560539.txt" |     2 +
 ...k_artists_artistspage.php?ID=204&page=3.txt" |     2 +
 ...re.co.uk_buying-property-continent.html.txt" |     2 +
 ....co.uk_usability_articles_print_wud.asp.txt" |     2 +
 ...i_3736cbd2e5895cf49854f8d70494bae7.html.txt" |     2 +
 ...am.pwp.blueyonder.co.uk_cx500_oil_pump_.txt" |     2 +
 ...index.php?name=News&file=article&sid=34.txt" |     2 +
 ....org.uk_scgn_articles_9902_inbrief.html.txt" |     2 +
 ...e.gov.uk_tmbc6_cycling_withoutmycar.htm.txt" |     2 +
 ...ersonal_pages_Ifan1_Booth_Notebooks.htm.txt" |     2 +
 ...le_3603_reduce-cancer-reoccurrence.html.txt" |     2 +
 ...discovery.co.uk_job.aspx?jid=11535&cd=1.txt" |     2 +
 ...www.syscom.plc.uk_solutions_distrib.asp.txt" |     2 +
 ...ay.jsp?section=Banking&article_id=64923.txt" |     2 +
 ...olutec.co.uk_06_chairman.asp?thesub=6.0.txt" |     2 +
 ....truststfc.co.uk_meeting_27_09_2006.php.txt" |     2 +
 ...ition_personal_experiences_michael.html.txt" |     2 +
 ..._library_history_library_history_3.html.txt" |     2 +
 ...listunitynetwork.co.uk_news_g8jepps.htm.txt" |     2 +
 ...:__www.weirdwiltshire.co.uk_250703.html.txt" |     2 +
 ...idwifery_lsa-guidelines_maternal-deaths.txt" |     2 +
 ...+Ltd&frmBPE=&frmCD=N&mopt=dpe&dpid=2302.txt" |     2 +
 ..." url=\"http:__www.siba.co.uk_about.asp.txt" |     2 +
 ...ave_Nirvana_2_Mizuno_Running_Shoes.html.txt" |     2 +
 ...earch.org.uk_?lid=1944&tmpl=ddmainprint.txt" |     2 +
 ...departments_services_campusenvironment_.txt" |     2 +
 ...etails.asp?ED=Arts+and+Crafts&offset=66.txt" |     2 +
 ...t_DBID_17ea4c66d7bd2c0aeb4513c89cb01afd.txt" |     2 +
 ....fst.rdg.ac.uk_news-archive-2004-11.htm.txt" |     2 +
 ..._www.aslib.co.uk_training_careers_9.htm.txt" |     2 +
 ...profile_Can-I-spend-time-on-an-elective.txt" |     2 +
 ...comesdata.co.uk_europe_duediligence.htm.txt" |     2 +
 ...nnine.demon.co.uk_NPC_1982_MEXICOSP.HTM.txt" |     2 +
 ...p:__www.hsl.gov.uk_publications_car.htm.txt" |     2 +
 ...w.baronage.co.uk_bphtm-01_const-02.html.txt" |     2 +
 ...14C83960FBA06562FFBA3B67013B5558FE96AD7.txt" |     2 +
 ...myleedsjobs.co.uk_jobdetails-11834.html.txt" |     2 +
 ...p:__www.paperairplanes.co.uk_orplan.php.txt" |     2 +
 ...\"http:__www.eca.ac.uk_tacitus_news.htm.txt" |     2 +
 ...casino-avenue.co.uk_2004_06_duuuuh.html.txt" |     2 +
 ...uk_lifeevent_penret_penreform_5_reg.asp.txt" |     2 +
 ...earch.co.uk_projects_T5_excavation.html.txt" |     2 +
 ...ww.redcross.org.uk_section.asp?id=49633.txt" |     2 +
 ...orum.org.uk_jobs_forestsmonitor2001.htm.txt" |     2 +
 ...um.org.uk_kids_detail.asp?ContentID=189.txt" |     2 +
 ...hetelly.co.uk_interviews_markwright.htm.txt" |     2 +
 ...__www.sitcom.co.uk_tlc_characters.shtml.txt" |     2 +
 ...ampian.co.uk_whiskycountry_ess_walk.htm.txt" |     2 +
 ...arty.org.uk_sgpnewsarticle20051222a.htm.txt" |     2 +
 ...ployment-solicitors.co.uk_Employer1.htm.txt" |     2 +
 ....uk_news_Food_Sounds_So_Good_at_DArcys_.txt" |     2 +
 ...ttp:__www.sweetsforu.co.uk_shipping.php.txt" |     2 +
 ...rg.uk_Listings.aspx?index=387&item=2929.txt" |     2 +
 ....uk_product.aspx?catno=53&prod=HCAA6241.txt" |     2 +
 ....uk_main_en_att-provider-ROMA_6913.html.txt" |     2 +
 ...rt-works.org.uk_artworks_z030703b.shtml.txt" |     2 +
 ...lier_booth_miscellaneous_wristgang.html.txt" |     2 +
 ...w.changingdiabetes.co.uk_view.asp?ID=92.txt" |     2 +
 ...ets.co.uk_las%20new%20bionix%20page.htm.txt" |     2 +
 ...=37&sql=&sortup=sorttitle&bookstatus=OK.txt" |     2 +
 ...org.uk_cheshire_proj_harvest_survey.htm.txt" |     2 +
 ...emydown.co.uk_clancomments.php?id=35113.txt" |     2 +
 ...p:__www.scis.org.uk_search_menu_new.asp.txt" |     2 +
 ...ckbankmembership_clickbankprotector.htm.txt" |     2 +
 ...lewa.co.uk_project5_teachers_T5-0-1.htm.txt" |     2 +
 ...dex.php?category=campaigns&c=i&uid=2130.txt" |     2 +
 ...ww.kent-ccc.co.uk_news_story.php?id=660.txt" |     2 +
 ...t.asp?WCI=SiteHome&ID=9908&PageID=56638.txt" |     2 +
 ...k_jsp_id_0340894342_Divine_Madness.html.txt" |     2 +
 ...:__www.newble.co.uk_chalmers_innes.html.txt" |     2 +
 ....searchenginespy.co.uk_article0027.html.txt" |     2 +
 ...pus_search_document.php?documentid=1211.txt" |     2 +
 ...lyheritage_forum_topic.asp?TOPIC_ID=26&.txt" |     2 +
 ...andhousesforsalelimousinabn0509263.html.txt" |     2 +
 ...cedevelopment_managerialfacilities.aspx.txt" |     2 +
 ...fety_various_rass_kmweb_safety_msds.htm.txt" |     2 +
 ...in.co.uk_index.php?id=23&L=3&article=13.txt" |     2 +
 ...rAreaNo=1053,2011&strKeyword=PS2006_4_3.txt" |     2 +
 ...tp:__www.itreviews.co.uk_games_g232.htm.txt" |     2 +
 ...co.uk_2002_09_to-quote-the-four-seasons.txt" |     2 +
 ...lt.aspx?group_id=16538&article_id=21979.txt" |     2 +
 ...arch_staff_jpf_papers_paper26_index.php.txt" |     2 +
 ..._www.all-energy.co.uk_newsletter45.html.txt" |     2 +
 ...rker.org.uk_article.php?article_id=8138.txt" |     2 +
 ....setdanceteacher.co.uk_newmarketmez.htm.txt" |     2 +
 ..._interpro_DisplayIproEntry?ac=IPR002824.txt" |     2 +
 ...www.garthyfog.co.uk_mawddach_valley.htm.txt" |     2 +
 ...aving_expeditions_jura05_jura_circ1.php.txt" |     2 +
 ...a-online.co.uk_viewnews.cfm?news_id=177.txt" |     2 +
 ...omPage.aspx?PageID=24163&sectionID=4585.txt" |     2 +
 ...ews_news_search.php?search=&start=12080.txt" |     2 +
 ...linc4info.org.uk_cms_pages_sitemap.html.txt" |     2 +
 ....ccp4.ac.uk_courses_IUCr2005_index.html.txt" |     2 +
 ...ve-engineering.co.uk_html_training.html.txt" |     2 +
 ..." url=\"http:__www.lanpac.co.uk_csi.php.txt" |     2 +
 ...org.uk_html_files_501_project_info.html.txt" |     2 +
 ...s.co.uk_showpage.asp?showdocumentid=196.txt" |     2 +
 ....ac.uk_star_docs_sun232.htx_node17.html.txt" |     2 +
 ..._cmhansrd_vo000405_debtext_00405-07.htm.txt" |     2 +
 ...RESBYTERY%20OF%20PENPONT%20p.%20672.htm.txt" |     2 +
 ...w.wessingtoncryogenics.co.uk_serv01.htm.txt" |     2 +
 ..._content_index.jsp?contentid=1999276669.txt" |     2 +
 ...tratford-upon-avon.co.uk_static_481.htm.txt" |     2 +
 ...group.co.uk_office-support-recruitment_.txt" |     2 +
 ...talog_product_info.php?products_id=2981.txt" |     2 +
 ..._fourwheels_formula1_article.asp?a=1327.txt" |     2 +
 ...id_docs_api_javax_swing_JSplitPane.html.txt" |     2 +
 ...online.co.uk_60_66_67_articles_7335.php.txt" |     2 +
 ...ww.iae.co.uk_news_designedforthejob.htm.txt" |     2 +
 ...uk_Case_Studies_studies_Ford3_Ford3.asp.txt" |     2 +
 ...ffee.co.uk_product.php?xProd=21&xSec=22.txt" |     2 +
 ...usability_aboutus_usability_aboutus.asp.txt" |     2 +
 ...d_story.asp?latestchapter=12&subarea=11.txt" |     2 +
 ...tt.co.uk_pages_searchdetails.asp?ID=776.txt" |     2 +
 ...forum.org.uk_userpage1.cfm?item_id=1913.txt" |     2 +
 ...choolhouse.org.uk_law_not_enrolled.html.txt" |     2 +
 ...lendar_archive_article.cfm?articleId=52.txt" |     2 +
 ...tp:__www.lathes.co.uk_beaver_page5.html.txt" |     2 +
 ...ww.hasslefreeminiatures.co.uk_rules.php.txt" |     2 +
 ...k_modules_event_viewevent.php?eveid=109.txt" |     2 +
 ...-cheats_Gamecube-(hardware)-Cheats.html.txt" |     2 +
 ...www.omega.co.uk_ppt_pptsc.asp?ref=LE902.txt" |     2 +
 ...rum_poster.cfm?sort=creatasc&poster=101.txt" |     2 +
 ....uk_directory_prof_issues_blreview.html.txt" |     2 +
 ...d_to_back_london's_olympic_2012_bid.htm.txt" |     2 +
 ...rum=106&thread=9757638&message=11724737.txt" |     2 +
 ...51_20553_100_10012_10010_category_10010.txt" |     2 +
 ....co.uk_news_industry_2005_ind505016.htm.txt" |     2 +
 ...uea.ac.uk_eas_events_litfestspr04.shtml.txt" |     2 +
 ...imberry.co.uk_Dotnetlectures_Index.aspx.txt" |     2 +
 ....lathes.co.uk_wolfjahnmiller_page2.html.txt" |     2 +
 ...te_cms_newsarticleview.asp?article=2173.txt" |     2 +
 ...ttp:__www.anweb.co.uk_l_04_c3_c3a10.htm.txt" |     2 +
 ...subject_modernlanguages_course_s1s2.htm.txt" |     2 +
 ...p:__www.law.warwick.ac.uk_ltj_4-1m.html.txt" |     2 +
 ...shipfoundation.org.uk_main_news.php?n20.txt" |     2 +
 ...ttp:__www.arnside-online.co.uk_care.htm.txt" |     2 +
 ...ort.co.uk_products_leatherman_micra.php.txt" |     2 +
 ...g.uk_columnists_docdiary2.php?docId=103.txt" |     2 +
 ...ne.co.uk_article_articleview_1733_1_153.txt" |     2 +
 ...-survey.pwp.blueyonder.co.uk_P_mugo.htm.txt" |     2 +
 ....free-internet.co.uk_email_sendmail.htm.txt" |     2 +
 ...esources_ums_PythonDoc_api_threads.html.txt" |     2 +
 ...l?in_article_id=405873&in_page_id=50002.txt" |     2 +
 ...rvices.gcal.ac.uk_synergy_03_scwbl.html.txt" |     2 +
 ..._BF_NEWSART_view.asp?Q=BF_NEWSART_95582.txt" |     2 +
 ...ware.co.uk_printpage.asp?REF=_group.asp.txt" |     2 +
 ...__www.dba.org.uk_aboutdba_chriswood.asp.txt" |     2 +
 ...ulture_books_0804_110804_food_books.htm.txt" |     2 +
 ..._www.newtsnni.gov.uk_actionplan_04b.htm.txt" |     2 +
 ..._KS3_databases_relational_databases.htm.txt" |     2 +
 ...on_pages_SchemesofWork_KS4_skillswl.htm.txt" |     2 +
 ...en_unusual-gadgets_mood-light-tile.html.txt" |     2 +
 ...cle&sid=175&mode=thread&order=0&thold=0.txt" |     2 +
 ..."http:__www.engender.org.uk_justice.htm.txt" |     2 +
 ...:__www.heros.org.uk_home_sub.asp?page=2.txt" |     2 +
 ...rg.uk_clinical_prac_mar_05_mar05_08.htm.txt" |     2 +
 ...ww.cdp.bham.ac.uk_About_CDP_methods.htm.txt" |     2 +
 ...k_caminfo_blueprint_articles.asp?ID=807.txt" |     2 +
 ...islation_scotland_acts2002_20017--b.htm.txt" |     2 +
 ...ures.co.uk_dog-pictures_shiba_inu.shtml.txt" |     2 +
 ...wbale-building.co.uk_index.php?page=faq.txt" |     2 +
 ...co.uk_londonunderuk_tfl_our_careers.asp.txt" |     2 +
 ...www.environment.bham.ac.uk_extindex.htm.txt" |     2 +
 ...bitions_Aug%2006%2004_James%20Cauty.htm.txt" |     2 +
 ...__www.sincuser.f9.co.uk_050_lastwrd.htm.txt" |     2 +
 ...__www.ocdaction.org.uk_skin-picking.htm.txt" |     2 +
 ..."http:__www.greenparty.org.uk_news_2033.txt" |     2 +
 ...ed.ac.uk_linguist_issues_17_17-229.html.txt" |     2 +
 ...s_aRelease.asp?akey=2026&Mon=01_07_2004.txt" |     2 +
 ...rl=\"http:__www.lpt.nhs.uk_service5.php.txt" |     2 +
 ....uk_resources_reportdetails.asp?id=1039.txt" |     2 +
 ...ctech.co.uk_siemens_hosted_exchange.htm.txt" |     2 +
 ...k_archives_2006_01_chomsky_intervi.html.txt" |     2 +
 .../0syndicate_bmw-s-and-chinese-justice.txt    |     2 +
 .../synd/10syndicate_france-s-fourth-moment.txt |     2 +
 ...ong-idea-of-france-by-brigitte-granville.txt |     2 +
 ...2syndicate_hollywood-s-favorite-villains.txt |     2 +
 ...te_japanese-lessons-for-china-s-currency.txt |     2 +
 ...rence-from-cuba-to-iran-by-joseph-s--nye.txt |     2 +
 ...yndicate_merkel-in-china-by-sanjaya-baru.txt |     2 +
 ...16syndicate_ms--europe-or-frau-germania-.txt |     2 +
 ...pply-chain-by-andrew-sheng-and-geng-xiao.txt |     2 +
 ...te_russia-s-ukrainian-path-to-the-future.txt |     2 +
 ...-europe-regulate-sovereign-wealth-funds-.txt |     2 +
 ...ndicate_china-and-a-new-balance-of-power.txt |     2 +
 ...ate_the--browning--of-african-technology.txt |     2 +
 .../txt/synd/21syndicate_the-china-question.txt |     2 +
 ...icate_the-end-of-the-russia-china-debate.txt |     2 +
 ...cate_the-making-of-china-s-trade-deficit.txt |     2 +
 ...e_the-return-of-franco-german-leadership.txt |     2 +
 .../25syndicate_the-transatlantic-index.txt     |     2 +
 .../26syndicate_will-russia-save-the-west-.txt  |     2 +
 ...ina-and-russia-in-the-new-world-disorder.txt |     2 +
 ...japan-protests-islands-by-liah-greenfeld.txt |     2 +
 .../synd/4syndicate_china-s-cyber-warriors.txt  |     2 +
 .../synd/5syndicate_china-s-gift-to-europe.txt  |     2 +
 .../6syndicate_china-s-threat-to-russia.txt     |     2 +
 ...s-vital-french-connection-by-kemal-dervi.txt |     2 +
 .../synd/8syndicate_exchange-rate-disorder.txt  |     2 +
 ...cate_france-and-germany-must-act-in-iraq.txt |     2 +
 ...ted_a_j_jacobs_year_of_living_biblically.txt |     2 +
 ...n_kay_shares_a_powerful_idea_about_ideas.txt |     2 +
 ...d_alex_tabarrok_foresees_economic_growth.txt |     2 +
 .../txt/ted/143ted_amy_tan_on_creativity.txt    |     2 +
 ..._barry_schwartz_on_the_paradox_of_choice.txt |     2 +
 ...njamin_wallace_on_the_price_of_happiness.txt |     2 +
 .../146ted_bono_s_call_to_action_for_africa.txt |     2 +
 ...147ted_dan_dennett_cute_sexy_sweet_funny.txt |     2 +
 ...ents_a_better_way_to_harvest_bone_marrow.txt |     2 +
 ...es_his_ted_prize_wish_once_upon_a_school.txt |     2 +
 .../150ted_don_norman_on_design_and_emotion.txt |     2 +
 ...goodwin_on_learning_from_past_presidents.txt |     2 +
 ...elliot_krane_the_mystery_of_chronic_pain.txt |     2 +
 ..._zeisel_on_the_playful_search_for_beauty.txt |     2 +
 ...x_moonie_diane_benscoter_how_cults_think.txt |     2 +
 .../txt/ted/155ted_hans_rosling_at_state.txt    |     2 +
 ..._helen_fisher_tells_us_why_we_love_cheat.txt |     2 +
 ...ed_isabel_allende_tells_tales_of_passion.txt |     2 +
 ...8ted_jessi_arrington_wearing_nothing_new.txt |     2 +
 ...nderkoffler_drive_3d_data_with_a_gesture.txt |     2 +
 ...joseph_lekuton_tells_a_parable_for_kenya.txt |     2 +
 .../ted/161ted_julia_sweeney_has_the_talk.txt   |     2 +
 ...en_robinson_says_schools_kill_creativity.txt |     2 +
 .../txt/ted/163ted_marcin_jakubowski.txt        |     2 +
 ...h_10_things_you_didn_t_know_about_orgasm.txt |     2 +
 ...sandel_the_lost_art_of_democratic_debate.txt |     2 +
 ...hael_shermer_on_believing_strange_things.txt |     2 +
 ...lenny_investigates_global_crime_networks.txt |     2 +
 ...at_hallucination_reveals_about_our_minds.txt |     2 +
 ...cca_saxe_how_brains_make_moral_judgments.txt |     2 +
 .../ted/170ted_robert_thurman_on_compassion.txt |     2 +
 ...y_sutherland_life_lessons_from_an_ad_man.txt |     2 +
 ...d_sean_gourley_on_the_mathematics_of_war.txt |     2 +
 ...s_how_he_found_the_true_face_of_leonardo.txt |     2 +
 ...wolfram_computing_a_theory_of_everything.txt |     2 +
 .../txt/ted/175ted_tom_wujec_build_a_tower.txt  |     2 +
 ...ujec_on_3_ways_the_brain_creates_meaning.txt |     2 +
 .../txt/ted/177ted_william_li.txt               |     2 +
 ...pany-wizard.co.uk_SEO_Link-Exchange.htm.txt" |     2 +
 ...org.uk_newspub_story.cfm?id=353&sid=123.txt" |     2 +
 ...ngconcern.org.uk_news_news_scots06.html.txt" |     2 +
 ..._in_the_Community_Joan_Wolstenholme.htm.txt" |     2 +
 ...t.asp?WCI=SiteHome&ID=4337&PageID=21591.txt" |     2 +
 ...ha.freeserve.co.uk_glh_233_mckenna.html.txt" |     2 +
 .../txt/un/A/178un_A_AC252_L13.txt              |     2 +
 .../txt/un/A/179un_A_C3_61_L17.txt              |     2 +
 .../txt/un/A/180un_APLC_MSP2_2000_1.txt         |     2 +
 .../txt/un/C/181un_CCW_APII_CONF4_SR2.txt       |     2 +
 .../style_recognizer/txt/un/C/182un_CD_1605.txt |     2 +
 .../style_recognizer/txt/un/C/183un_CD_1606.txt |     2 +
 .../style_recognizer/txt/un/C/184un_CD_1672.txt |     2 +
 .../txt/un/C/185un_CD_PV1080.txt                |     2 +
 .../txt/un/C/186un_CD_PV1157.txt                |     2 +
 .../txt/un/C/187un_CD_PV857.txt                 |     2 +
 .../txt/un/C/188un_CD_PV861.txt                 |     2 +
 .../txt/un/C/189un_CEDAW_C_2002_II_3_ADD2.txt   |     2 +
 .../txt/un/C/190un_CES_SEM52_2.txt              |     2 +
 .../txt/un/C/191un_CES_SEM52_3.txt              |     2 +
 .../style_recognizer/txt/un/C/192un_CLCS_34.txt |     2 +
 .../txt/un/C/193un_CRC_GC_2003_4.txt            |     2 +
 .../txt/un/D/194un_DP_2002_34.txt               |     2 +
 .../txt/un/E/195un_E_C16_2002_2.txt             |     2 +
 .../txt/un/E/196un_E_CN17_2002_PC2_19.txt       |     2 +
 .../txt/un/E/197un_E_CN3_2003_25.txt            |     2 +
 .../txt/un/E/198un_E_CN4_2003_G_12.txt          |     2 +
 .../txt/un/E/199un_E_CN4_2004_G_21.txt          |     2 +
 .../txt/un/F/200un_FCCC_SBI_2002_15.txt         |     2 +
 .../txt/un/F/201un_FCCC_SBI_2002_9.txt          |     2 +
 .../txt/un/F/202un_FCCC_SBI_2004_L27.txt        |     2 +
 .../txt/un/H/203un_HRI_CORE_1_ADD117.txt        |     2 +
 .../txt/un/H/204un_HRI_CORE_1_ADD123.txt        |     2 +
 .../txt/un/H/205un_HSP_GC_19_2_ADD2.txt         |     2 +
 .../style_recognizer/txt/un/I/206un_IDB25_6.txt |     2 +
 .../txt/un/I/207un_IDB35_13.txt                 |     2 +
 .../txt/un/I/208un_ISBA_8_A_1.txt               |     2 +
 .../txt/un/N/209un_NPT_CONF2005_PCI_10.txt      |     2 +
 .../txt/un/P/210un_PBC24_1_ADD1.txt             |     2 +
 .../txt/un/P/211un_PBC24_4-IDB35_4.txt          |     2 +
 .../txt/un/P/212un_PBC_2_SLE_9.txt              |     2 +
 .../txt/un/P/213un_PCNICC_2000_1_ADD1.txt       |     2 +
 .../txt/un/P/214un_PCNICC_2001_WGAPIC_L1.txt    |     2 +
 .../txt/un/S/215un_S_2000_475.txt               |     2 +
 .../txt/un/S/216un_S_2000_817.txt               |     2 +
 .../txt/un/S/217un_S_2001_1038-A_56_521.txt     |     2 +
 .../txt/un/S/218un_S_2001_1039.txt              |     2 +
 .../txt/un/S/219un_S_2001_1317.txt              |     2 +
 .../txt/un/S/220un_S_2001_70-A_55_740.txt       |     2 +
 .../txt/un/S/221un_S_2001_947.txt               |     2 +
 .../txt/un/S/222un_S_2002_1141.txt              |     2 +
 .../txt/un/S/223un_S_2002_1180.txt              |     2 +
 .../txt/un/S/224un_S_2002_238-A_56_858.txt      |     2 +
 .../txt/un/S/225un_S_2002_313-A_56_888.txt      |     2 +
 .../txt/un/S/226un_S_2002_573-A_56_960.txt      |     2 +
 .../txt/un/S/227un_S_2003_326-A_ES-10_222.txt   |     2 +
 .../txt/un/S/228un_S_2006_490.txt               |     2 +
 .../txt/un/S/229un_S_2009_208-A_63_820.txt      |     2 +
 .../txt/un/S/230un_SAICM_ICCM2_INF_18.txt       |     2 +
 .../txt/un/S/231un_S_PV4596.txt                 |     2 +
 .../txt/un/S/232un_S_PV4646.txt                 |     2 +
 .../txt/un/S/233un_S_PV4684RESUMPTION1.txt      |     2 +
 .../txt/un/S/234un_S_PV4736.txt                 |     2 +
 .../txt/un/S/235un_S_PV4903.txt                 |     2 +
 .../txt/un/S/236un_S_PV4944.txt                 |     2 +
 .../txt/un/S/237un_S_PV5627.txt                 |     2 +
 .../txt/un/S/238un_ST_SGB_2002_9.txt            |     2 +
 .../txt/un/T/239un_TD_B_49_11.txt               |     2 +
 .../txt/un/T/240un_TD_B_49_SC2_L1.txt           |     2 +
 .../txt/un/T/241un_TD_B_COM3_54.txt             |     2 +
 .../txt/un/T/242un_TD_B_EX28_L1.txt             |     2 +
 .../txt/un/T/243un_TESTGVADUTY001.txt           |     2 +
 .../txt/un/U/244un_UNEP_GC_22_8_ADD2.txt        |     2 +
 .../txt/un/U/245un_UNEP_GC_24_2.txt             |     2 +
 .../txt/un/U/246un_UNEP_POPS_INC6_22.txt        |     2 +
 ...r.ac.uk_sppa_sppa_responsibilities.html.txt" |     2 +
 ...tomer_terms_index.omp?cid=1146151223400.txt" |     2 +
 ...Cmach_Backissues_j005_Articles_hall.htm.txt" |     2 +
 ...k_archives_2006_06_met_chief_could.html.txt" |     2 +
 ...:__www.fenews.co.uk_newsview.asp?n=1014.txt" |     2 +
 ..._north_east_petscorner_news_140106.html.txt" |     2 +
 ...tp:__www.nursingbra.co.uk_emma_jane.htm.txt" |     2 +
 ..._care_worcestershire_easemore_road.html.txt" |     2 +
 ...k_comedy_comedians_comedian.aspx?id=429.txt" |     2 +
 ...ttp:__www.seequality.org.uk_gender.html.txt" |     2 +
 .../src/test/resources/taxonomies/fidelity.txt  |   503 +
 .../resources/taxonomies/musicTaxonomyRoot.csv  |     1 +
 .../tree_kernel/action-plan-doc-list.csv        |   119 +
 .../src/test/resources/tree_kernel/model.txt    | 10143 ++++++++++
 .../test/resources/tree_kernel/svm.run.mac.zip  |   Bin 0 -> 90804 bytes
 .../src/test/resources/tree_kernel/svm_classify |   Bin 0 -> 257356 bytes
 .../test/resources/tree_kernel/svm_classify.exe |   Bin 0 -> 136343 bytes
 .../resources/tree_kernel/svm_classify.linux    |   Bin 0 -> 275767 bytes
 .../src/test/resources/tree_kernel/svm_learn    |   Bin 0 -> 336068 bytes
 .../test/resources/tree_kernel/svm_learn.exe    |   Bin 0 -> 147778 bytes
 .../src/test/resources/tree_kernel/training.txt | 16350 +++++++++++++++++
 .../tree_kernel/training_pos_neg_sentiment.txt  | 15930 ++++++++++++++++
 615 files changed, 61149 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/README.md
----------------------------------------------------------------------
diff --git a/opennlp-similarity/README.md b/opennlp-similarity/README.md
new file mode 100644
index 0000000..65ccfdd
--- /dev/null
+++ b/opennlp-similarity/README.md
@@ -0,0 +1,157 @@
+# OpenNLP.Similarity Component
+
+It is a project under Apache OpenNLP which subjects results of parsing, part-of-speech tagging and rhetoric parsing to machine learning.
+It is leveraged in search, content generation & enrichment, chat bots and other text processing domains where relevance assessment task is a key.
+
+## What is OpenNLP.Similarity?
+
+OpenNLP.Similarity is an NLP engine which solves a number of text processing and search tasks based on OpenNLP and Stanford NLP parsers. It is designed to be used by a non-linguist software engineer to build linguistically-enabled: 
+<ul>
+<li>search engines</li>
+<li>recommendation systems</li>
+<li>dialogue systems</li>
+<li>text analysis and semantic processing engines</li>
+<li>data-loss prevention system</li>
+<li>content & document generation tools</li>
+<li>text writing style, authenticity, sentiment, sensitivity to sharing recognizers</li>
+<li>general-purpose deterministic inductive learner equipped with abductive, deductive and analogical reasoning which also embraces concept learning and tree kernel learning. </li>
+</ul>
+
+OpenNLP similarity provides a series of techniques to support the overall content pipeline, from text collection to cleaning, classification, personalization and distribution. Technology and implementation of content pipeline developed at eBay is described [here](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/examples/ContentPipeline.pdf). 
+## Installation
+ 0) Do [`git clone`](https://github.com/bgalitsky/relevance-based-on-parse-trees.git) to setup the environment including resources. Besides what you get from git, `/resources` directory requires some additional work:
+ 
+ 1) Download the main [jar](https://github.com/bgalitsky/relevance-based-on-parse-trees/blob/master/opennlp-similarity.11.jar).
+ 
+ 2) Set all necessary jars in /lib folder. Larger size jars are not on git so please download them from [Stanford NLP site](http://nlp.stanford.edu/)
+ <li>edu.mit.jverbnet-1.2.0.jar</li>
+ <li>ejml-0.23.jar</li>
+ <li>joda-time.jar</li>
+ <li>jollyday.jar</li>
+ <li>stanford-corenlp-3.5.2-models.jar</li>
+ <li>xom.jar</li>
+ The rest of jars are available via maven.
+ 
+ 3) Set up src/test/resources directory
+ - new_vn.zip needs to be unzipped
+ - OpenNLP models need to be downloaded into the directory 'models' from [here](http://opennlp.sourceforge.net/models-1.5/)
+  
+  As a result the following folders should be in in /resources:
+  As obtained [from git](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/test/resources):
+ <li>/new_vn (VerbNet)</li>
+ <li>/maps (some lookup files such as products, brands, first names etc.)</li>
+ <li>/external_rst (examples of import of rhetoric parses from other systems)</li>
+ <li>/fca (Formal Concept Analysis learning)</li>
+ <li>/taxonomies (for search support, taxonomies are auto-mined from the web)</li>
+ <li>/tree_kernel (for tree kernel learning, representation of parse trees, thickets and trained models)</li>
+  Manual downloading is also required for:
+  <li>/new_vn</li>
+  <li>/w2v (where word2vector model needs to be downloaded, if desired)</li>
+  
+ 4) Try running tests which will give you a hint on how to integrate OpenNLP.Similarity functionality into your application. You can start with [Matcher test](https://github.com/bgalitsky/relevance-based-on-parse-trees/blob/949bac8c2a41c21a1e54fec075f2966d693114a4/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java) and observe how long paragraphs can be linguistically matched (you can compare this with just an intersection of keywords)
+  
+ 5) Look at [example POMs](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/examples) for how to better integrate into your existing project
+  
+## Creating a simple project
+
+  Create a project from [MyMatcher.java](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/examples/MyMatcher.java).  
+ 
+## Engines and Systems of OpenNLP.Similarity
+
+### Main relevance assessment function
+It takes two texts and returns the cardinality of a maximum common subgraph representations of these texts. This measure is supposed to be much more accurate than keyword statistics, compositional semantic models word2vec because linguistic structure is taken into account, not just co-occurrences of keywords. 
+  [Matching class](https://github.com/bgalitsky/relevance-based-on-parse-trees/blob/master/src/main/java/opennlp/tools/parse_thicket/matching/Matcher.java) in [matching package] (https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/main/java/opennlp/tools/parse_thicket/matching) has 
+
+`List<List<ParseTreeChunk>> assessRelevance(String para1, String para2)`
+
+function which returns the list of [common phrases between these paragraph]s.
+
+To avoid re-parsing the same strings and improve the speed, use
+
+`List<List<ParseTreeChunk>> assessRelevanceCache(String para1, String para2)`
+
+It operates on the level of sentences (giving [maximal common subtree](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/examples/Inferring_sem_prop_of_sentences.pdf)) and paragraphs (giving maximal common [sub-parse thicket](https://en.wikipedia.org/wiki/Parse_Thicket)). Maximal common sub-parse thicket is also represented as a [list of common phrases](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/examples/MachineLearningSyntParseTreesGalitsky.pdf).
+
+<li>Search results re-ranker based on linguistic similarity</li>
+<li>Request Handler for SOLR which used parse tree similarity</li>
+
+### Search engine
+The following set of functionalities is available to enable search with linguistic features. It is desirable when query is long (more than 4 keywords), logically complex, ambiguous or 
+<li>Search results re-ranker based on linguistic similarity</li>
+<li>Request Handler for SOLR which used parse tree similarity</li>
+<li>Taxonomy builder via learning from the web</li>
+<li>Appropriate rhetoric map of an answer verifier. If parts of the answer are located in distinct discourse units, this answer might be irrelevant even if all keywords are mapped</li>
+<li>Tree kernel learning re-ranker to improve search relevance within a given domain with pre-trained model</li>
+
+SOLR request handlers are available [here](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/main/java/opennlp/tools/similarity/apps/solr)
+
+Taxonomy builder is [here](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/main/java/opennlp/tools/similarity/apps/taxo_builder).
+ Examples of pre-built taxonomy are available in [this directory](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/test/resources/taxonomies). Please pay attention at taxonomies built for languages other than English. A [music taxonomy](https://github.com/bgalitsky/relevance-based-on-parse-trees/blob/master/src/test/resources/taxonomies/musicTaxonomyRoot.csv) is an example of the seed data for taxonomy building, and [this taxonomy hashmap dump](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/main/java/opennlp/tools/similarity/apps/taxo_builder/taxonomy.txt) is a good example of what can be automatically constructed. A paper on taxonomy learning is [here](https://github.com/bgalitsky/relevance-based-on-parse-trees/blob/master/examples/taxonomyBuilder.pdf). 
+ 
+#### Search results re-ranker
+Re-ranking scores similarity between a given `orderedListOfAnswers` and  `question`
+
+  `List<Pair<String,Double>> pairList = new ArrayList<Pair<String,Double>>();`
+  
+  `for (String ans: orderedListOfAnswers) {`
+  
+            `List<List<ParseTreeChunk>> similarityResult = m.assessRelevanceCache(question, ans);`
+            
+            `double score = parseTreeChunkListScorer.getParseTreeChunkListScoreAggregPhraseType(similarityResult);`
+            
+            `Pair<String,Double> p = new Pair<String, Double>(ans, score);`
+            
+            `pairList.add(p);`
+            
+        `}`
+        
+   `Collections.sort(pairList, Comparator.comparing(p -> p.getSecond()));`
+   
+   Then `pairList` is then ranked according to the linguistic relevance score. This score can be combined with other sources such as popularity, geo-proximity and others.
+
+### Content generator
+ It takes a topic, builds a taxonomy for it and forms a table of content. It then  mines the web for documents for each table of content item, finds relevant sentences and paragraphs and merges them into a document [package](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/main/java/opennlp/tools/similarity/apps). The resultant document has a TOC, sections, figures & captions and also a reference section. We attempt to reproduce how humans cut-and-paste content from the web while writing on a topic. 
+  Content generation has a [demo](http://37.46.135.20/)  and to run it from IDE start [here](https://github.com/bgalitsky/relevance-based-on-parse-trees/blob/master/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorRunner.java). Examples of written documents are [here](http://37.46.135.20/wrt_latest/).
+  Another content generation option is about opinion data. Reviews are mined for, cross-bred and made "original" for search engines. This and general content generation is done for SEO purposes. [Review builder](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java) composes fake reviews which are in turn should be recognized by a Fake Review detector
+
+### Text classifier / feature detector in text
+The [classifier code](https://github.com/bgalitsky/relevance-based-on-parse-trees/blob/master/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifierMultiplePara.java) is the same but the [model files](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/test/resources/tree_kernel/TRAINING) vary for the applications below:
+<li>detect security leaks
+<li>detect argumentation
+<li>detect low cohesiveness in text
+<li>detect authors\u2019 doubt and low confidence
+<li>detect fake review
+
+Document classification to six major classes {finance, business, legal, computing, engineering, health} is available via [nearest neighbor model](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java). A Lucene training model (1G file) is obtained from Wikipedia corpus. This classifier can be trained for an arbitrary classes once respective Wiki pages are selected and respective [Lucene index is built](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java). Once proper training documents are selected from Wikipedia with adequate coverage, the accuracy is usually higher than what can be achieved by word2vec classification models.
+
+### General-purpose [deterministic inductive learner](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/main/java/opennlp/tools/jsmlearning) implements JS Mills method of induction and abduction (deduction is also partially implemented).
+
+ Inductive learning implemented as a base for syntactic tree-based learning is similar to the family of approaches such as Explanation-based Learning and Inductive Logic Programming.
+ 
+#### Tree-kernel learning 
+ 
+ is integrated to allow application of SVM learning to sentence-level and paragraph-level linguistic data including discourse. Unlike learning in numerical space, each dimension in tree kernel learning is an occurrence of a particular subtree. Similarity is not a numerical distance but a count of common subtrees. A set of parse trees for individual sentences to represent a paragraph is called
+ [parse thicket](https://en.wikipedia.org/wiki/Parse_Thicket). Its representation as a graph is coded in a tree representation via parenthesis such as [model*.txt] (https://github.com/bgalitsky/relevance-based-on-parse-trees/blob/master/src/test/resources/tree_kernel/model_pos_neg_sentiment.txt).
+ To do model building and predictions, C modules are run in [this directory](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/test/resources/tree_kernel), so proper choice need to be made: {svm_classify.linux, svm_classify.max, svm_classify.exe, svm_learn.*}. Also, proper run permissions needs to be set for these files.
+ 
+#### Concept learning 
+ 
+  is a branch of deterministic learning which is applied to attribute-value pairs and possesses useful explainability feature, unlike statistical and deep learning. It is fairly useful for data exploration and visualization since all interesting relations can be visualized. 
+    Concept learning covers inductive and abductive learning and also some cases of deduction. Explore [this package](https://github.com/bgalitsky/relevance-based-on-parse-trees/tree/master/src/main/java/opennlp/tools/fca) for the concept learning-related features.
+
+### Filtering results for Speech Recognition based on semantic meaningfulness
+It takes results from a speech-to-text system and subjects them to [filtering]
+(https://github.com/bgalitsky/relevance-based-on-parse-trees/blob/master/src/main/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessor.java). Those recognized candidate words which do not make sense together are filtered out, based on the frequency of co-occurrences found on the web.
+## Related Research
+Here's the link to the book on [question-answering](https://www.amazon.com/Natural-Language-Question-Answering-system/dp/0868039799/ref=sr_1_10?ie=UTF8&qid=1478871097&sr=8-10&keywords=galitsky)
+
+and [research papers](https://scholar.google.com/citations?hl=ru&user=kR_M3HIAAAAJ).
+
+Also the recent [book related to reasoning and linguistics in humans & machines](https://www.amazon.com/Computational-Autism-Human-Computer-Interaction-Galitsky/dp/3319399713)
+
+## Configuring OpenNLP.Similarity component
+
+VerbNet model is included by default, so that the hand-coded meanings of the verb are used when simularity between verb phrases are computed.
+
+To include word2vector model, [download it](https://deeplearning4j.org/) and make sure the following path is valid:
+`resourceDir + "/w2v/GoogleNews-vectors-negative300.bin.gz"`
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/README.txt
----------------------------------------------------------------------
diff --git a/opennlp-similarity/README.txt b/opennlp-similarity/README.txt
new file mode 100644
index 0000000..b535487
--- /dev/null
+++ b/opennlp-similarity/README.txt
@@ -0,0 +1,138 @@
+Apache OpenNLP ${pom.version}
+===============================
+
+
+Building from the Source Distribution
+-------------------------------------
+
+At least Maven 3.0.0 is required for building.
+
+To build everything go into the opennlp directory and run the following command:
+    mvn clean install
+   
+The results of the build will be placed  in:
+    opennlp-distr/target/apache-opennlp-[version]-bin.tar-gz (or .zip)
+
+What is in Similarity component in Apache OpenNLP ${pom.version}
+---------------------------------------
+SIMILARITY COMPONENT of OpenNLP
+
+1. Introduction
+This component does text relevance assessment. It takes two portions of texts (phrases, sentences, paragraphs) and returns a similarity score.
+Similarity component can be used on top of search to improve relevance, computing similarity score between a question and all search results (snippets). 
+Also, this component is useful for web mining of images, videos, forums, blogs, and other media with textual descriptions. Such applications as content generation 
+and filtering meaningless speech recognition results are included in the sample applications of this component.
+   Relevance assessment is based on machine learning of syntactic parse trees (constituency trees, http://en.wikipedia.org/wiki/Parse_tree). 
+The similarity score is calculated as the size of all maximal common sub-trees for sentences from a pair of texts (
+www.aaai.org/ocs/index.php/WS/AAAIW11/paper/download/3971/4187, www.aaai.org/ocs/index.php/FLAIRS/FLAIRS11/paper/download/2573/3018,
+www.aaai.org/ocs/index.php/SSS/SSS10/paper/download/1146/1448).
+   The objective of Similarity component is to give an application engineer as tool for text relevance which can be used as a black box, no need to understand 
+ computational linguistics or machine learning. 
+ 
+ 2. Installation
+ Please refer to OpenNLP installation instructions
+ 
+ 3. First use case of Similarity component: search
+ 
+ To start with this component, please refer to SearchResultsProcessorTest.java in package opennlp.tools.similarity.apps
+   public void testSearchOrder() runs web search using Bing API and improves search relevance.
+   Look at the code of 
+      public List<HitBase> runSearch(String query) 
+   and then at 
+      private	BingResponse calculateMatchScoreResortHits(BingResponse resp, String searchQuery)
+   which gets search results from Bing and re-ranks them based on computed similarity score.
+ 
+   The main entry to Similarity component is 
+    SentencePairMatchResult matchRes = sm.assessRelevance(snapshot, searchQuery);
+    where we pass the search query and the snapshot and obtain the similarity assessment structure which includes the similarity score.
+   
+   To run this test you need to obtain search API key from Bing at www.bing.com/developers/s/APIBasics.html and specify it in public class BingQueryRunner in
+  protected static final String APP_ID. 
+  
+  4. Solving a unique problem: content generation
+  To demonstrate the usability of Similarity component to tackle a problem which is hard to solve without a linguistic-based technology, 
+  we introduce a content generation component:
+   RelatedSentenceFinder.java
+   
+   The entry point here is the function call
+   hits = f.generateContentAbout("Albert Einstein");
+   which writes a biography of Albert Einstein by finding sentences on the web about various kinds of his activities (such as 'born', 'graduate', 'invented' etc.).
+   The key here is to compute similarity between the seed expression like "Albert Einstein invented relativity theory" and search result like 
+   "Albert Einstein College of Medicine | Medical Education | Biomedical ...
+    www.einstein.yu.edu/Albert Einstein College of Medicine is one of the nation's premier institutions for medical education, ..."
+    and filter out irrelevant search results.
+   
+   This is done in function 
+   public HitBase augmentWithMinedSentencesAndVerifyRelevance(HitBase item, String originalSentence,
+			List<String> sentsAll)
+			
+   	  SentencePairMatchResult matchRes = sm.assessRelevance(pageSentence + " " + title, originalSentence);
+   You can consult the results in gen.txt, where an essay on Einstein bio is written.
+   
+   These are examples of generated articles, given the article title
+     http://www.allvoices.com/contributed-news/9423860/content/81937916-ichie-sings-jazz-blues-contemporary-tunes
+     http://www.allvoices.com/contributed-news/9415063-britney-spears-femme-fatale-in-north-sf-bay-area
+     
+  5. Solving a high-importance problem: filtering out meaningless speech recognition results.
+  Speech recognitions SDKs usually produce a number of phrases as results, such as 
+  			 "remember to buy milk tomorrow from trader joes",
+			 "remember to buy milk tomorrow from 3 to jones"
+  One can see that the former is meaningful, and the latter is meaningless (although similar in terms of how it is pronounced).
+  We use web mining and Similarity component to detect a meaningful option (a mistake caused by trying to interpret meaningless 
+  request by a query understanding system such as Siri for iPhone can be costly).
+ 
+  SpeechRecognitionResultsProcessor.java does the job:
+  public List<SentenceMeaningfullnessScore> runSearchAndScoreMeaningfulness(List<String> sents)
+  re-ranks the phrases in the order of decrease of meaningfulness.
+  
+  6. Similarity component internals
+  in the package   opennlp.tools.textsimilarity.chunker2matcher
+  ParserChunker2MatcherProcessor.java does parsing of two portions of text and matching the resultant parse trees to assess similarity between 
+  these portions of text.
+  To run ParserChunker2MatcherProcessor
+     private static String MODEL_DIR = "resources/models";
+  needs to be specified
+  
+  The key function
+  public SentencePairMatchResult assessRelevance(String para1, String para2)
+  takes two portions of text and does similarity assessment by finding the set of all maximum common subtrees 
+  of the set of parse trees for each portion of text
+  
+  It splits paragraphs into sentences, parses them, obtained chunking information and produces grouped phrases (noun, evrn, prepositional etc.):
+  public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(String para)
+  
+  and then attempts to find common subtrees:
+  in ParseTreeMatcherDeterministic.java
+		List<List<ParseTreeChunk>> res = md.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst)
+  
+  Phrase matching functionality is in package opennlp.tools.textsimilarity;
+  ParseTreeMatcherDeterministic.java:
+  Here's the key matching function which takes two phrases, aligns them and finds a set of maximum common sub-phrase
+  public List<ParseTreeChunk> generalizeTwoGroupedPhrasesDeterministic
+  
+  7. Package structure
+  	opennlp.tools.similarity.apps : 3 main applications
+	opennlp.tools.similarity.apps.utils: utilities for above applications
+	
+	opennlp.tools.textsimilarity.chunker2matcher: parser which converts text into a form for matching parse trees
+	opennlp.tools.textsimilarity: parse tree matching functionality
+	
+
+
+
+Requirements
+------------
+Java 1.5 is required to run OpenNLP
+Maven 3.0.0 is required for building it
+
+Known OSGi Issues
+------------
+In an OSGi environment the following things are not supported:
+- The coreference resolution component
+- The ability to load a user provided feature generator class
+
+Note
+----
+The current API contains still many deprecated methods, these
+will be removed in one of our next releases, please
+migrate to our new API.

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/lib/edu.mit.jverbnet-1.2.0.jar
----------------------------------------------------------------------
diff --git a/opennlp-similarity/lib/edu.mit.jverbnet-1.2.0.jar b/opennlp-similarity/lib/edu.mit.jverbnet-1.2.0.jar
new file mode 100644
index 0000000..eba3a97
Binary files /dev/null and b/opennlp-similarity/lib/edu.mit.jverbnet-1.2.0.jar differ

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/lib/ejml-0.23.jar
----------------------------------------------------------------------
diff --git a/opennlp-similarity/lib/ejml-0.23.jar b/opennlp-similarity/lib/ejml-0.23.jar
new file mode 100644
index 0000000..60a37df
Binary files /dev/null and b/opennlp-similarity/lib/ejml-0.23.jar differ

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/lib/javax.json.jar
----------------------------------------------------------------------
diff --git a/opennlp-similarity/lib/javax.json.jar b/opennlp-similarity/lib/javax.json.jar
new file mode 100644
index 0000000..09967d8
Binary files /dev/null and b/opennlp-similarity/lib/javax.json.jar differ

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/lib/joda-time.jar
----------------------------------------------------------------------
diff --git a/opennlp-similarity/lib/joda-time.jar b/opennlp-similarity/lib/joda-time.jar
new file mode 100644
index 0000000..b2aca95
Binary files /dev/null and b/opennlp-similarity/lib/joda-time.jar differ

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/lib/jollyday.jar
----------------------------------------------------------------------
diff --git a/opennlp-similarity/lib/jollyday.jar b/opennlp-similarity/lib/jollyday.jar
new file mode 100644
index 0000000..a6bf8b3
Binary files /dev/null and b/opennlp-similarity/lib/jollyday.jar differ

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/lib/xom.jar
----------------------------------------------------------------------
diff --git a/opennlp-similarity/lib/xom.jar b/opennlp-similarity/lib/xom.jar
new file mode 100644
index 0000000..4eb88da
Binary files /dev/null and b/opennlp-similarity/lib/xom.jar differ

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/com.zip
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/com.zip b/opennlp-similarity/src/main/java/com.zip
new file mode 100644
index 0000000..6f96f23
Binary files /dev/null and b/opennlp-similarity/src/main/java/com.zip differ

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/1f97041b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/DedupResult.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/DedupResult.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/DedupResult.java
new file mode 100644
index 0000000..c2a2c0e
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/DedupResult.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.apps.object_dedup;
+
+public class DedupResult {
+
+	public DedupResult(String string, int i, boolean b) {
+		// TODO Auto-generated constructor stub
+	}
+
+	public Boolean isDecision() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+}