You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/04 21:12:33 UTC
svn commit: r1142766 - in /incubator/opennlp/trunk/opennlp-tools/lang/pt: ./
tokenizer/ tokenizer/pt-detokenizer.xml
Author: colen
Date: Mon Jul 4 19:12:33 2011
New Revision: 1142766
URL: http://svn.apache.org/viewvc?rev=1142766&view=rev
Log:
OPENNLP-212 Added Portuguese detokenizer based on the English one with few modifications.
Added:
incubator/opennlp/trunk/opennlp-tools/lang/pt/
incubator/opennlp/trunk/opennlp-tools/lang/pt/tokenizer/
incubator/opennlp/trunk/opennlp-tools/lang/pt/tokenizer/pt-detokenizer.xml (with props)
Added: incubator/opennlp/trunk/opennlp-tools/lang/pt/tokenizer/pt-detokenizer.xml
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/lang/pt/tokenizer/pt-detokenizer.xml?rev=1142766&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/lang/pt/tokenizer/pt-detokenizer.xml (added)
+++ incubator/opennlp/trunk/opennlp-tools/lang/pt/tokenizer/pt-detokenizer.xml Mon Jul 4 19:12:33 2011
@@ -0,0 +1,92 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<dictionary>
+ <entry operation="RIGHT_LEFT_MATCHING">
+ <token>"</token>
+ </entry>
+ <entry operation="RIGHT_LEFT_MATCHING">
+ <token>'</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>.</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>?</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>!</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>,</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>;</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>:</token>
+ </entry>
+ <entry operation="MOVE_RIGHT">
+ <token>(</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>)</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>}</token>
+ </entry>
+ <entry operation="MOVE_RIGHT">
+ <token>{</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>]</token>
+ </entry>
+ <entry operation="MOVE_RIGHT">
+ <token>[</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>»</token>
+ </entry>
+ <entry operation="MOVE_RIGHT">
+ <token>«</token>
+ </entry>
+ <entry operation="MOVE_RIGHT">
+ <token>``</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>''</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>%</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>.org</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>.com</token>
+ </entry>
+ <entry operation="MOVE_LEFT">
+ <token>.net</token>
+ </entry>
+ <entry operation="MOVE_RIGHT">
+ <token>#</token>
+ </entry>
+</dictionary>
Propchange: incubator/opennlp/trunk/opennlp-tools/lang/pt/tokenizer/pt-detokenizer.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain