You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/07/01 15:50:55 UTC

svn commit: r1141938 - in /incubator/opennlp/sandbox/wikinews-importer: ./ samples/ src/ src/main/ src/main/java/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/opennlp/ src/main/java/org/apache/opennlp/wikinews_importer/

Author: joern
Date: Fri Jul  1 13:50:54 2011
New Revision: 1141938

URL: http://svn.apache.org/viewvc?rev=1141938&view=rev
Log:
OPENNLP-209 Simple tool to upload wikinews xmi files

Added:
    incubator/opennlp/sandbox/wikinews-importer/pom.xml   (with props)
    incubator/opennlp/sandbox/wikinews-importer/samples/
    incubator/opennlp/sandbox/wikinews-importer/samples/FTC_begins_antitrust_inquiry_of_Google.xmi   (with props)
    incubator/opennlp/sandbox/wikinews-importer/samples/Internet_hacking_group_LulzSec_disbands.xmi   (with props)
    incubator/opennlp/sandbox/wikinews-importer/samples/Obama_announces_troop_reductions_in_Afghanistan.xmi   (with props)
    incubator/opennlp/sandbox/wikinews-importer/samples/Ratko_Mladić_said_to_be_too_ill_to_face_trial.xmi   (with props)
    incubator/opennlp/sandbox/wikinews-importer/samples/TypeSystem.xml   (with props)
    incubator/opennlp/sandbox/wikinews-importer/samples/US_actor_Peter_Falk_dies_aged_83.xmi   (with props)
    incubator/opennlp/sandbox/wikinews-importer/src/
    incubator/opennlp/sandbox/wikinews-importer/src/main/
    incubator/opennlp/sandbox/wikinews-importer/src/main/java/
    incubator/opennlp/sandbox/wikinews-importer/src/main/java/org/
    incubator/opennlp/sandbox/wikinews-importer/src/main/java/org/apache/
    incubator/opennlp/sandbox/wikinews-importer/src/main/java/org/apache/opennlp/
    incubator/opennlp/sandbox/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/
    incubator/opennlp/sandbox/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsImporter.java   (with props)
Modified:
    incubator/opennlp/sandbox/wikinews-importer/   (props changed)

Propchange: incubator/opennlp/sandbox/wikinews-importer/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Fri Jul  1 13:50:54 2011
@@ -0,0 +1,7 @@
+target
+
+.project
+
+.settings
+
+.classpath

Added: incubator/opennlp/sandbox/wikinews-importer/pom.xml
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/wikinews-importer/pom.xml?rev=1141938&view=auto
==============================================================================
--- incubator/opennlp/sandbox/wikinews-importer/pom.xml (added)
+++ incubator/opennlp/sandbox/wikinews-importer/pom.xml Fri Jul  1 13:50:54 2011
@@ -0,0 +1,92 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.    
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+
+	<parent>
+		<groupId>org.apache</groupId>
+		<artifactId>apache</artifactId>
+		<version>9</version>
+		<relativePath />
+	</parent>
+
+	<groupId>org.apache.opennlp</groupId>
+	<artifactId>wikinews-importer</artifactId>
+	<version>0.0.1-incubating-SNAPSHOT</version>
+	<packaging>war</packaging>
+
+	<name>OpenNLP Wikinews Importer</name>
+
+	<prerequisites>
+		<maven>3.0</maven>
+	</prerequisites>
+
+	<repositories>
+		<repository>
+		    <id>maven2-repository.java.net</id>
+		    <name>Java.net Repository for Maven</name>
+		    <url>http://download.java.net/maven/2/</url>
+		    <layout>default</layout>
+		</repository> 
+		<repository>
+		    <id>maven-repository.java.net</id>
+		    <name>Java.net Maven 1 Repository (legacy)</name>
+		    <url>http://download.java.net/maven/1</url>
+		    <layout>legacy</layout>
+		</repository>
+	</repositories>
+	
+	<dependencies>
+		<dependency>
+		    <groupId>com.sun.jersey</groupId>
+		    <artifactId>jersey-json</artifactId>
+		    <version>1.8</version>
+		</dependency>
+
+		<dependency>
+		    <groupId>com.sun.jersey</groupId>
+		    <artifactId>jersey-client</artifactId>
+		    <version>1.8</version>
+		</dependency>
+
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>4.8.1</version>
+			<scope>test</scope>
+		</dependency>
+	</dependencies>
+	
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<configuration>
+					<source>1.6</source>
+					<target>1.6</target>
+          			<compilerArgument>-Xlint</compilerArgument>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+</project>
\ No newline at end of file

Propchange: incubator/opennlp/sandbox/wikinews-importer/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/sandbox/wikinews-importer/samples/FTC_begins_antitrust_inquiry_of_Google.xmi
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/wikinews-importer/samples/FTC_begins_antitrust_inquiry_of_Google.xmi?rev=1141938&view=auto
==============================================================================
--- incubator/opennlp/sandbox/wikinews-importer/samples/FTC_begins_antitrust_inquiry_of_Google.xmi (added)
+++ incubator/opennlp/sandbox/wikinews-importer/samples/FTC_begins_antitrust_inquiry_of_Google.xmi Fri Jul  1 13:50:54 2011
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<xmi:XMI xmlns:cas="http:///uima/cas.ecore" xmlns:annotations="http:///org/apache/opennlp/annotations.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="FTC begins antitrust inquiry of Google&#10;&#10;Google has confirmed that it has &quot;received formal notification,&quot; that the&#10;Federal Trade Commission (FTC) is investigating its business practices.&#10;The acknowledgment was posted on the internet search engine company's blog&#10;Friday. Google said it was unclear about the nature of the probe.&#10;&#10;A broad FTC investigation would cause the company be tied up in defending&#10;itself for years, and according to Bloomberg, might well be the government’s &#10;biggest antitrust case since the Microsoft probe.&#10;&#10;The FTC will likely launch an anti-trust investigation, as the company has &#10;been the target of many past smaller ones, according to CNN.&#10;&#10;Mercury News reported Friday morning that the FTC review will probably focus&#10;on whether Google is using its dominance in the internet search business to&#10;p
 romote its own products and services. Google's rivals say Google unfairly&#10;profits from its monopoly in the search engine business by using its search&#10;services to point users of its internet search engine to its own sites and&#10;services while hiding links to its competitors.&#10;&#10;The FCC probe is expected to determine if Google abuses it market dominance&#10;to promote moneymaking online marketing, such as its mapping, comparison&#10;shopping and travel services. According to CNN, &quot;Google dominates search&#10;in the United States, controlling about two-thirds of the market, according&#10;to comScore. It also licenses the world's largest smartphone operating system,&#10;and its share of U.S. display advertising revenue recently eclipsed&#10;long-time leader Yahoo.&quot;&#10;&#10;Google's response is that most complaints of anticompetitive behavior come&#10;from companies who are displeased with their ranking in Google's search engine.&#10;However, Joshua Wri
 ght, law professor at George Mason University said the&#10;FTC will be looking at possible harm to users and not complaints&#10;by Google’s competitors.&#10;&#10;Melissa Maxman, co-chair of a Washington-based, antitrust practice&#10;group, said the FTC's Bureau of Consumer Protection unit will almost&#10;certainly be involved to see if Google deceives search engine users&#10;by screening search results to profit its own services. She said she&#10;would be &quot;shocked&quot; if the consumer protection unit was not involved.&#10;&#10;Google Fellow Amit Singhal said in the post, &quot;We respect the FTC's&#10;process and will be working with them (as we have with other agencies)&#10;over the coming months to answer questions about Google and our services.&quot;&#10;But Singhal acknowledged, &quot;It's still unclear exactly what the FTC's&#10;concerns are, but we're clear about where we stand. Since the beginning,&#10;we have been guided by the idea that, if we focus on th
 e user,&#10;all else will follow.&quot;&#10;&#10;Bloomberg reported Friday: &quot;Google has set aside $500 million for a&#10;U.S. government investigation into online pharmacy ads the company&#10;accepted that may have violated the law.&quot;&#10;&#10;Google's efforts to improve privacy policies after last years'&#10;determination that its social-networking service Google Buzz used&#10;deceptive tactics, are currently being overseen by the FTC."/>
+    <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="3039" language="x-unspecified"/>
+    <annotations:Headline xmi:id="13" sofa="1" begin="0" end="38"/>
+    <annotations:Sentence xmi:id="17" sofa="1" begin="40" end="185"/>
+    <annotations:Sentence xmi:id="21" sofa="1" begin="186" end="326"/>
+    <annotations:Sentence xmi:id="25" sofa="1" begin="328" end="529"/>
+    <annotations:Sentence xmi:id="29" sofa="1" begin="531" end="667"/>
+    <annotations:Sentence xmi:id="33" sofa="1" begin="861" end="1095"/>
+    <annotations:Sentence xmi:id="37" sofa="1" begin="669" end="860"/>
+    <annotations:Sentence xmi:id="41" sofa="1" begin="2851" end="3039"/>
+    <annotations:Sentence xmi:id="45" sofa="1" begin="2673" end="2849"/>
+    <annotations:Sentence xmi:id="49" sofa="1" begin="2231" end="2443"/>
+    <annotations:Sentence xmi:id="53" sofa="1" begin="2444" end="2671"/>
+    <annotations:Sentence xmi:id="57" sofa="1" begin="2148" end="2229"/>
+    <annotations:Sentence xmi:id="61" sofa="1" begin="1888" end="2147"/>
+    <annotations:Sentence xmi:id="65" sofa="1" begin="1568" end="1722"/>
+    <annotations:Sentence xmi:id="69" sofa="1" begin="1723" end="1886"/>
+    <annotations:Sentence xmi:id="73" sofa="1" begin="1097" end="1275"/>
+    <annotations:Sentence xmi:id="77" sofa="1" begin="1276" end="1407"/>
+    <annotations:Sentence xmi:id="81" sofa="1" begin="1408" end="1566"/>
+    <annotations:Organization xmi:id="85" sofa="1" begin="0" end="3"/>
+    <annotations:Organization xmi:id="89" sofa="1" begin="32" end="38"/>
+    <annotations:Organization xmi:id="93" sofa="1" begin="40" end="46"/>
+    <annotations:Organization xmi:id="97" sofa="1" begin="114" end="144"/>
+    <annotations:Organization xmi:id="101" sofa="1" begin="269" end="275"/>
+    <annotations:Organization xmi:id="105" sofa="1" begin="336" end="339"/>
+    <annotations:Organization xmi:id="109" sofa="1" begin="437" end="446"/>
+    <annotations:Organization xmi:id="113" sofa="1" begin="513" end="522"/>
+    <annotations:Organization xmi:id="117" sofa="1" begin="535" end="538"/>
+    <annotations:Organization xmi:id="121" sofa="1" begin="663" end="666"/>
+    <annotations:Organization xmi:id="125" sofa="1" begin="669" end="681"/>
+    <annotations:Organization xmi:id="129" sofa="1" begin="715" end="718"/>
+    <annotations:Organization xmi:id="133" sofa="1" begin="757" end="763"/>
+    <annotations:Organization xmi:id="137" sofa="1" begin="861" end="867"/>
+    <annotations:Organization xmi:id="141" sofa="1" begin="881" end="887"/>
+    <annotations:Organization xmi:id="145" sofa="1" begin="1101" end="1104"/>
+    <annotations:Organization xmi:id="149" sofa="1" begin="1139" end="1145"/>
+    <annotations:Organization xmi:id="153" sofa="1" begin="1289" end="1292"/>
+    <annotations:Organization xmi:id="157" sofa="1" begin="1295" end="1301"/>
+    <annotations:Organization xmi:id="161" sofa="1" begin="1398" end="1406"/>
+    <annotations:Organization xmi:id="165" sofa="1" begin="1559" end="1564"/>
+    <annotations:Organization xmi:id="169" sofa="1" begin="1568" end="1574"/>
+    <annotations:Organization xmi:id="173" sofa="1" begin="1699" end="1705"/>
+    <annotations:Organization xmi:id="177" sofa="1" begin="1764" end="1787"/>
+    <annotations:Organization xmi:id="181" sofa="1" begin="1797" end="1800"/>
+    <annotations:Organization xmi:id="185" sofa="1" begin="1865" end="1871"/>
+    <annotations:Organization xmi:id="189" sofa="1" begin="1971" end="1974"/>
+    <annotations:Organization xmi:id="193" sofa="1" begin="2056" end="2062"/>
+    <annotations:Organization xmi:id="197" sofa="1" begin="2231" end="2237"/>
+    <annotations:Organization xmi:id="201" sofa="1" begin="2292" end="2295"/>
+    <annotations:Organization xmi:id="205" sofa="1" begin="2418" end="2424"/>
+    <annotations:Organization xmi:id="209" sofa="1" begin="2507" end="2510"/>
+    <annotations:Organization xmi:id="213" sofa="1" begin="2673" end="2682"/>
+    <annotations:Organization xmi:id="217" sofa="1" begin="2701" end="2707"/>
+    <annotations:Organization xmi:id="221" sofa="1" begin="2851" end="2857"/>
+    <annotations:Organization xmi:id="225" sofa="1" begin="3035" end="3038"/>
+    <annotations:Organization xmi:id="229" sofa="1" begin="2963" end="2969"/>
+    <annotations:Person xmi:id="233" sofa="1" begin="1732" end="1745"/>
+    <annotations:Person xmi:id="237" sofa="1" begin="1888" end="1902"/>
+    <annotations:Person xmi:id="241" sofa="1" begin="2245" end="2257"/>
+    <annotations:Person xmi:id="245" sofa="1" begin="2448" end="2455"/>
+    <annotations:Paragraph xmi:id="249" sofa="1" begin="40" end="326"/>
+    <annotations:Paragraph xmi:id="253" sofa="1" begin="328" end="529"/>
+    <annotations:Paragraph xmi:id="257" sofa="1" begin="531" end="667"/>
+    <annotations:Paragraph xmi:id="261" sofa="1" begin="669" end="1095"/>
+    <annotations:Paragraph xmi:id="265" sofa="1" begin="1097" end="1566"/>
+    <annotations:Paragraph xmi:id="269" sofa="1" begin="1568" end="1886"/>
+    <annotations:Paragraph xmi:id="273" sofa="1" begin="1888" end="2229"/>
+    <annotations:Paragraph xmi:id="277" sofa="1" begin="2231" end="2671"/>
+    <annotations:Paragraph xmi:id="281" sofa="1" begin="2673" end="2849"/>
+    <annotations:Paragraph xmi:id="285" sofa="1" begin="2851" end="3039"/>
+    <cas:View sofa="1" members="8 13 17 21 25 29 33 37 41 45 49 53 57 61 65 69 73 77 81 85 89 93 97 101 105 109 113 117 121 125 129 133 137 141 145 149 153 157 161 165 169 173 177 181 185 189 193 197 201 205 209 213 217 221 225 229 233 237 241 245 249 253 257 261 265 269 273 277 281 285"/>
+</xmi:XMI>

Propchange: incubator/opennlp/sandbox/wikinews-importer/samples/FTC_begins_antitrust_inquiry_of_Google.xmi
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/sandbox/wikinews-importer/samples/Internet_hacking_group_LulzSec_disbands.xmi
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/wikinews-importer/samples/Internet_hacking_group_LulzSec_disbands.xmi?rev=1141938&view=auto
==============================================================================
--- incubator/opennlp/sandbox/wikinews-importer/samples/Internet_hacking_group_LulzSec_disbands.xmi (added)
+++ incubator/opennlp/sandbox/wikinews-importer/samples/Internet_hacking_group_LulzSec_disbands.xmi Fri Jul  1 13:50:54 2011
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xmi:XMI xmlns:cas="http:///uima/cas.ecore" xmlns:annotations="http:///org/apache/opennlp/annotations.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Internet hacking group LulzSec disbands&#10;&#10;The computer hacking organization Lulz Security disbanded yesterday, said the&#10;group in a statement. Released via Pastebin, it states &quot;[o]ur planned 50 day&#10;cruise has expired, and we must now sail into the distance.&quot;&#10;&#10;The announcement comes a day after The Guardian released leaked IRC logs of&#10;private conversations between LulzSec members and days after the arrest of a&#10;British teenager in connection to the group.&#10;&#10;With the announcement the group released previously unseen private information.&#10;Included was AT&amp;T and AOL internal data, Battlefield Heros Beta user account data,&#10;and user information from the NATO bookstore. In total over 810,000 people's user&#10;information was present in this final release.&#10;&#10;LulzSec became known after they allegedly hacked the Play Station Network,&#10
 ;rendering the service unusable. The group allegedly proceeded to hack US federal&#10;government websites such as those of the FBI and CIA. One of their last hacks&#10;targeted the Arizona Department of Public Safety in which they allegedly copied&#10;hundreds of confidential documents, some of which were marked&#10;&quot;confidential/law enforcement sensitive.&quot;&#10;&#10;While LulzSec has disbanded, copycat organizations including LulzSec Brazil&#10;and LulzSec Italy are still operational.&#10;"/>
+    <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="1286" language="x-unspecified"/>
+    <annotations:Headline xmi:id="13" sofa="1" begin="0" end="39"/>
+    <annotations:Paragraph xmi:id="17" sofa="1" begin="41" end="256"/>
+    <annotations:Paragraph xmi:id="21" sofa="1" begin="258" end="455"/>
+    <annotations:Paragraph xmi:id="25" sofa="1" begin="457" end="748"/>
+    <annotations:Paragraph xmi:id="29" sofa="1" begin="750" end="1167"/>
+    <annotations:Paragraph xmi:id="33" sofa="1" begin="1169" end="1285"/>
+    <annotations:Sentence xmi:id="37" sofa="1" begin="141" end="256"/>
+    <annotations:Sentence xmi:id="41" sofa="1" begin="41" end="140"/>
+    <annotations:Sentence xmi:id="45" sofa="1" begin="258" end="455"/>
+    <annotations:Sentence xmi:id="49" sofa="1" begin="666" end="748"/>
+    <annotations:Sentence xmi:id="53" sofa="1" begin="537" end="665"/>
+    <annotations:Sentence xmi:id="57" sofa="1" begin="457" end="536"/>
+    <annotations:Sentence xmi:id="61" sofa="1" begin="750" end="856"/>
+    <annotations:Sentence xmi:id="65" sofa="1" begin="857" end="959"/>
+    <annotations:Sentence xmi:id="69" sofa="1" begin="960" end="1167"/>
+    <annotations:Sentence xmi:id="73" sofa="1" begin="1169" end="1285"/>
+    <cas:View sofa="1" members="8 13 17 21 25 29 33 37 41 45 49 53 57 61 65 69 73"/>
+</xmi:XMI>

Propchange: incubator/opennlp/sandbox/wikinews-importer/samples/Internet_hacking_group_LulzSec_disbands.xmi
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/sandbox/wikinews-importer/samples/Obama_announces_troop_reductions_in_Afghanistan.xmi
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/wikinews-importer/samples/Obama_announces_troop_reductions_in_Afghanistan.xmi?rev=1141938&view=auto
==============================================================================
--- incubator/opennlp/sandbox/wikinews-importer/samples/Obama_announces_troop_reductions_in_Afghanistan.xmi (added)
+++ incubator/opennlp/sandbox/wikinews-importer/samples/Obama_announces_troop_reductions_in_Afghanistan.xmi Fri Jul  1 13:50:54 2011
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xmi:XMI xmlns:cas="http:///uima/cas.ecore" xmlns:annotations="http:///org/apache/opennlp/annotations.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Obama announces troop reductions in Afghanistan&#10;&#10;A third of U.S. forces in Afghanistan are to be withdrawn&#10;from the country by the end of next year, president Barack Obama&#10;has announced. In a televised statement on Wednesday evening,&#10;Obama announced 33,000 soldiers would be withdrawn from the country&#10;by the summer of next year, and declared the U.S. had beaten&#10;al-Qaeda and the Taliban.&#10;&#10;During the speech he announced the U.S. was &quot;meeting our goals&quot;&#10;to reverse the momentum of the Taliban and train the Afghan National Army,&#10;and said al-Qaeda was &quot;on a path to defeat&quot; after the death of&#10;Osama bin Laden earlier this year. Obama said that it was &quot;time&#10;to focus on nation-building here at home,&quot; and that the reduction&#10;in troops marked &quot;the beginning—but not the end—of our effort to&#10;wind down 
 this war.&quot;&#10;&#10;U.S. civilian leaders from both the Democratic and Republican&#10;parties have voiced support for a rapid drawdown of troops—27 senators&#10;from both parties sent a letter to the president last week advocating&#10;&quot;a sizable and sustained reduction in forces.&quot; However, politicians&#10;are also angry at the huge cost of the war—currently over $2bn&#10;every week—and the U.S. public is tired of the war which has gone&#10;on ten years, leaving thousands of Americans dead.&#10;&#10;However, NATO officials are worried the president is making a&#10;big gamble with such a large scale withdrawal, and have warned&#10;the U.S. could take substantial losses in the country as &quot;fighting season&quot;&#10;begins into the summer.&#10;&#10;David Petraeus, the general who commands all U.S. forces in Afghanistan,&#10;urged a smaller withdrawal. He has reportedly refused to endorse&#10;the decision by Obama.&#10;&#10;Analysts say any setba
 cks in the country this year will leave&#10;Obama exposed to allegations he was too quick in his decision&#10;and was too politically motivated.&#10;"/>
+    <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="1807" language="x-unspecified"/>
+    <annotations:Headline xmi:id="13" sofa="1" begin="0" end="47"/>
+    <annotations:Paragraph xmi:id="17" sofa="1" begin="49" end="388"/>
+    <annotations:Paragraph xmi:id="21" sofa="1" begin="390" end="807"/>
+    <annotations:Paragraph xmi:id="25" sofa="1" begin="809" end="1259"/>
+    <annotations:Paragraph xmi:id="29" sofa="1" begin="1261" end="1484"/>
+    <annotations:Paragraph xmi:id="33" sofa="1" begin="1486" end="1646"/>
+    <annotations:Paragraph xmi:id="37" sofa="1" begin="1648" end="1806"/>
+    <annotations:Sentence xmi:id="41" sofa="1" begin="49" end="186"/>
+    <annotations:Sentence xmi:id="45" sofa="1" begin="187" end="388"/>
+    <annotations:Sentence xmi:id="49" sofa="1" begin="390" end="626"/>
+    <annotations:Sentence xmi:id="53" sofa="1" begin="627" end="807"/>
+    <annotations:Sentence xmi:id="57" sofa="1" begin="809" end="1058"/>
+    <annotations:Sentence xmi:id="61" sofa="1" begin="1059" end="1259"/>
+    <annotations:Sentence xmi:id="65" sofa="1" begin="1261" end="1484"/>
+    <annotations:Sentence xmi:id="69" sofa="1" begin="1587" end="1646"/>
+    <annotations:Sentence xmi:id="73" sofa="1" begin="1486" end="1586"/>
+    <annotations:Sentence xmi:id="77" sofa="1" begin="1648" end="1806"/>
+    <cas:View sofa="1" members="8 13 17 21 25 29 33 37 41 45 49 53 57 61 65 69 73 77"/>
+</xmi:XMI>

Propchange: incubator/opennlp/sandbox/wikinews-importer/samples/Obama_announces_troop_reductions_in_Afghanistan.xmi
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/sandbox/wikinews-importer/samples/Ratko_Mladić_said_to_be_too_ill_to_face_trial.xmi
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/wikinews-importer/samples/Ratko_Mladi%C4%87_said_to_be_too_ill_to_face_trial.xmi?rev=1141938&view=auto
==============================================================================
--- incubator/opennlp/sandbox/wikinews-importer/samples/Ratko_Mladić_said_to_be_too_ill_to_face_trial.xmi (added)
+++ incubator/opennlp/sandbox/wikinews-importer/samples/Ratko_Mladić_said_to_be_too_ill_to_face_trial.xmi Fri Jul  1 13:50:54 2011
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xmi:XMI xmlns:cas="http:///uima/cas.ecore" xmlns:annotations="http:///org/apache/opennlp/annotations.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Ratko Mladić said to be too ill to face trial&#10;&#10;Ratko Mladić, a former Bosnian Serb general, is allegedly too ill to&#10;face trial for war crimes. According to his lawyer, 69-year-old Mladić&#10;will not survive to see the start of proceedings.&#10;&#10;Concerns for the health of Mladić come despite a Serbian judge having&#10;ruled him fit for extradition to the UN war crimes tribunal in The Hague&#10;only last Friday, a ruling which his lawyer seeks to appeal.&#10;&#10;“It was impossible to have a coherent conversation with him or to talk&#10;of his defence,” said lawyer Milos Saljic, after meeting with Mladić on Sunday.&#10;&#10;Serbian deputy war crimes prosecutor Bruno Vekaric believes Mladić is&#10;using his illness in an attempt to delay his extradition.&#10;&#10;“He's a man who has not taken care of his health for a while, but not&#10;to the 
 point that he cannot stand trial,&quot; says Vekaric. &quot;According to&#10;doctors, he doesn't need hospitalisation.&quot;&#10;&#10;Chief prosecutor Vladimir Vukcevic echoed these sentiments, stating&#10;that Mladić was both well enough to make the two-hour flight to the&#10;Netherlands and is conscious of the charges against him, despite claims&#10;from family members that the former general is not lucid.&#10;&#10;Mladić’s son, Darko Mladić, is calling for his father’s health to be&#10;reviewed by independent experts. Mladić has reportedly suffered three&#10;strokes, the last in 2008, resulting in the partial paralysis of his&#10;right side. His family has expressed concerns he will not receive&#10;adequate treatment in The Hague.&#10;&#10;Mladić is being indicted for crimes against humanity, war crimes&#10;and genocide, notably the Srebrenica massacre during the Bosnian war.&#10;"/>
+    <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="1618" language="x-unspecified"/>
+    <annotations:Headline xmi:id="13" sofa="1" begin="0" end="45"/>
+    <annotations:Paragraph xmi:id="17" sofa="1" begin="47" end="236"/>
+    <annotations:Paragraph xmi:id="21" sofa="1" begin="238" end="441"/>
+    <annotations:Paragraph xmi:id="25" sofa="1" begin="443" end="593"/>
+    <annotations:Paragraph xmi:id="29" sofa="1" begin="595" end="722"/>
+    <annotations:Paragraph xmi:id="33" sofa="1" begin="724" end="906"/>
+    <annotations:Paragraph xmi:id="37" sofa="1" begin="908" end="1173"/>
+    <annotations:Paragraph xmi:id="45" sofa="1" begin="1483" end="1617"/>
+    <annotations:Paragraph xmi:id="101" sofa="1" begin="1175" end="1481"/>
+    <annotations:Sentence xmi:id="49" sofa="1" begin="143" end="236"/>
+    <annotations:Sentence xmi:id="53" sofa="1" begin="47" end="142"/>
+    <annotations:Sentence xmi:id="57" sofa="1" begin="238" end="441"/>
+    <annotations:Sentence xmi:id="61" sofa="1" begin="443" end="593"/>
+    <annotations:Sentence xmi:id="65" sofa="1" begin="595" end="722"/>
+    <annotations:Sentence xmi:id="69" sofa="1" begin="850" end="906"/>
+    <annotations:Sentence xmi:id="73" sofa="1" begin="724" end="849"/>
+    <annotations:Sentence xmi:id="77" sofa="1" begin="908" end="1173"/>
+    <annotations:Sentence xmi:id="81" sofa="1" begin="1395" end="1481"/>
+    <annotations:Sentence xmi:id="85" sofa="1" begin="1277" end="1394"/>
+    <annotations:Sentence xmi:id="89" sofa="1" begin="1175" end="1276"/>
+    <annotations:Sentence xmi:id="93" sofa="1" begin="1483" end="1617"/>
+    <cas:View sofa="1" members="8 13 17 21 25 29 33 37 45 101 49 53 57 61 65 69 73 77 81 85 89 93"/>
+</xmi:XMI>

Propchange: incubator/opennlp/sandbox/wikinews-importer/samples/Ratko_Mladić_said_to_be_too_ill_to_face_trial.xmi
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/sandbox/wikinews-importer/samples/TypeSystem.xml
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/wikinews-importer/samples/TypeSystem.xml?rev=1141938&view=auto
==============================================================================
--- incubator/opennlp/sandbox/wikinews-importer/samples/TypeSystem.xml (added)
+++ incubator/opennlp/sandbox/wikinews-importer/samples/TypeSystem.xml Fri Jul  1 13:50:54 2011
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+
+	<!--
+	 ***************************************************************
+	 * Licensed to the Apache Software Foundation (ASF) under one
+	 * or more contributor license agreements.  See the NOTICE file
+	 * distributed with this work for additional information
+	 * regarding copyright ownership.  The ASF licenses this file
+	 * to you under the Apache License, Version 2.0 (the
+	 * "License"); you may not use this file except in compliance
+	 * with the License.  You may obtain a copy of the License at
+         *
+	 *   http://www.apache.org/licenses/LICENSE-2.0
+	 * 
+	 * Unless required by applicable law or agreed to in writing,
+	 * software distributed under the License is distributed on an
+	 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	 * KIND, either express or implied.  See the License for the
+	 * specific language governing permissions and limitations
+	 * under the License.
+	 ***************************************************************
+   -->
+   
+<typeSystemDescription  xmlns="http://uima.apache.org/resourceSpecifier">
+	<name>WikinewsTypeSystem</name>
+	<description>Wikinews Sample Type System Definition</description>
+	<vendor>The Apache Software Foundation</vendor>
+	<version>1.0</version>
+	<types>
+		<typeDescription>
+			<name>org.apache.opennlp.annotations.Headline</name>
+			<description></description>
+			<supertypeName>uima.tcas.Annotation</supertypeName>
+		</typeDescription>
+		
+		<typeDescription>
+			<name>org.apache.opennlp.annotations.Paragraph</name>
+			<description></description>
+			<supertypeName>uima.tcas.Annotation</supertypeName>
+		</typeDescription>
+		
+		<typeDescription>
+			<name>org.apache.opennlp.annotations.Sentence</name>
+			<description></description>
+			<supertypeName>uima.tcas.Annotation</supertypeName>
+		</typeDescription>
+		
+		<typeDescription>
+			<name>org.apache.opennlp.annotations.Token</name>
+			<description></description>
+			<supertypeName>uima.tcas.Annotation</supertypeName>
+		</typeDescription>
+		
+		<typeDescription>
+			<name>org.apache.opennlp.annotations.Person</name>
+			<description></description>
+			<supertypeName>uima.tcas.Annotation</supertypeName>
+		</typeDescription>
+		<typeDescription>
+			<name>org.apache.opennlp.annotations.Organization</name>
+			<description></description>
+			<supertypeName>uima.tcas.Annotation</supertypeName>
+		</typeDescription>
+	</types>
+</typeSystemDescription>
\ No newline at end of file

Propchange: incubator/opennlp/sandbox/wikinews-importer/samples/TypeSystem.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/sandbox/wikinews-importer/samples/US_actor_Peter_Falk_dies_aged_83.xmi
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/wikinews-importer/samples/US_actor_Peter_Falk_dies_aged_83.xmi?rev=1141938&view=auto
==============================================================================
--- incubator/opennlp/sandbox/wikinews-importer/samples/US_actor_Peter_Falk_dies_aged_83.xmi (added)
+++ incubator/opennlp/sandbox/wikinews-importer/samples/US_actor_Peter_Falk_dies_aged_83.xmi Fri Jul  1 13:50:54 2011
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xmi:XMI xmlns:cas="http:///uima/cas.ecore" xmlns:annotations="http:///org/apache/opennlp/annotations.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="US actor Peter Falk dies aged 83&#10;&#10;US actor Peter Falk died on Thursday at his home in Beverly Hills, California&#10;after a struggle with Alzheimer's disease. Falk may be most known for his&#10;role as Detective Columbo in the television series of the same name that ran&#10;on NBC from 1968 to 1978, which moved to ABC in 1989. The last episode aired&#10;in 2003. His portrayal of the character won him four Emmy Awards.&#10;&#10;Falk, born in 1927, had his right eye surgically removed at the age of&#10;three, due to a cancerous growth. He wore a glass eye for most of his life.&#10;Because of this, he was rejected by the armed services at the end of World War II.&#10;&#10;Falk starred in his first crime film, Murder, Inc. in 1960, for which&#10;he was nominated for an Academy Award. In Blake Edwards's 1965 film The&#10;Great Race, he appeared with Jack Lemmon, Tony Curtis, and Natalie
  Wood.&#10;&#10;Falk is survived by his wife and two daughters.&#10;"/>
+    <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="902" language="x-unspecified"/>
+    <annotations:Headline xmi:id="13" sofa="1" begin="0" end="32"/>
+    <annotations:Paragraph xmi:id="17" sofa="1" begin="34" end="405"/>
+    <annotations:Paragraph xmi:id="21" sofa="1" begin="407" end="636"/>
+    <annotations:Paragraph xmi:id="25" sofa="1" begin="638" end="852"/>
+    <annotations:Paragraph xmi:id="29" sofa="1" begin="854" end="901"/>
+    <annotations:Sentence xmi:id="33" sofa="1" begin="554" end="636"/>
+    <annotations:Sentence xmi:id="37" sofa="1" begin="34" end="154"/>
+    <annotations:Sentence xmi:id="41" sofa="1" begin="155" end="316"/>
+    <annotations:Sentence xmi:id="45" sofa="1" begin="317" end="348"/>
+    <annotations:Sentence xmi:id="49" sofa="1" begin="349" end="405"/>
+    <annotations:Sentence xmi:id="53" sofa="1" begin="407" end="511"/>
+    <annotations:Sentence xmi:id="57" sofa="1" begin="512" end="553"/>
+    <annotations:Sentence xmi:id="61" sofa="1" begin="638" end="746"/>
+    <annotations:Sentence xmi:id="65" sofa="1" begin="747" end="852"/>
+    <annotations:Sentence xmi:id="69" sofa="1" begin="854" end="901"/>
+    <cas:View sofa="1" members="8 13 17 21 25 29 33 37 41 45 49 53 57 61 65 69"/>
+</xmi:XMI>

Propchange: incubator/opennlp/sandbox/wikinews-importer/samples/US_actor_Peter_Falk_dies_aged_83.xmi
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/sandbox/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsImporter.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsImporter.java?rev=1141938&view=auto
==============================================================================
--- incubator/opennlp/sandbox/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsImporter.java (added)
+++ incubator/opennlp/sandbox/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsImporter.java Fri Jul  1 13:50:54 2011
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.opennlp.wikinews_importer;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import javax.ws.rs.core.MediaType;
+
+import com.sun.jersey.api.client.Client;
+import com.sun.jersey.api.client.ClientResponse;
+import com.sun.jersey.api.client.WebResource;
+
+public class WikinewsImporter {
+
+	public static void main(String[] args) throws Exception {
+		
+		if (args.length != 2) {
+			System.out.println("WikinewsImporter address xmiFile");
+			System.exit(-1);
+		}
+		
+		Client c = Client.create();
+		
+		WebResource r = c.resource(args[0]);
+	
+		// read file into bytes
+		File xmiFile = new File(args[1]);
+		ByteArrayOutputStream xmiBytes = new ByteArrayOutputStream((int) xmiFile.length());
+		
+		InputStream xmiIn = new FileInputStream(xmiFile);
+		
+		byte buffer[] = new byte[1024]; 
+		int length;
+		while ((length = xmiIn.read(buffer)) > 0) {
+			xmiBytes.write(buffer, 0, length);
+		}
+		
+		xmiIn.close();
+		
+		ClientResponse response = r
+				.path(xmiFile.getName())
+				.accept(MediaType.TEXT_XML)
+				// TODO: How to fix this? Shouldn't accept do it?
+				.header("Content-Type", MediaType.TEXT_XML)
+				.post(ClientResponse.class, xmiBytes.toByteArray());
+		
+		System.out.println(xmiFile.getName() + " " + response.getStatus());
+	}
+}

Propchange: incubator/opennlp/sandbox/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsImporter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain