You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/03/26 09:48:27 UTC

svn commit: r1305254 - /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java

Author: joern
Date: Mon Mar 26 07:48:27 2012
New Revision: 1305254

URL: http://svn.apache.org/viewvc?rev=1305254&view=rev
Log:
OPENNLP-337 Fixed import, removed main method and removed dependency on Lucenes ArrayUtils.

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java?rev=1305254&r1=1305253&r2=1305254&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/PorterStemmer.java Mon Mar 26 07:48:27 2012
@@ -1,12 +1,12 @@
-package org.apache.lucene.analysis.en;
+package opennlp.tools.stemmer;
 
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
+ * the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -43,14 +43,6 @@ package org.apache.lucene.analysis.en;
 
 */
 
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.FileInputStream;
-
-import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR;
-import org.apache.lucene.util.ArrayUtil;
-
 /**
  *
  * Stemmer, implementing the Porter Stemming Algorithm
@@ -60,16 +52,15 @@ import org.apache.lucene.util.ArrayUtil;
  * by calling one of the various stem(something) methods.
  */
 
-class PorterStemmer
-{
+class PorterStemmer implements Stemmer {
   private char[] b;
   private int i,    /* offset into b */
     j, k, k0;
   private boolean dirty = false;
-  private static final int INITIAL_SIZE = 50;
-
+  private static final int INC = 50;
+  
   public PorterStemmer() {
-    b = new char[INITIAL_SIZE];
+    b = new char[INC];
     i = 0;
   }
 
@@ -85,8 +76,12 @@ class PorterStemmer
    * adding characters, you can call stem(void) to process the word.
    */
   public void add(char ch) {
-    if (b.length <= i) {
-      b = ArrayUtil.grow(b, i+1);
+    if (b.length == i) {
+      
+      char[] new_b = new char[i+INC];
+      for (int c = 0; c < i; c++) new_b[c] = b[c]; {
+        b = new_b;
+      }
     }
     b[i++] = ch;
   }
@@ -437,6 +432,14 @@ class PorterStemmer
       return s;
   }
 
+  /**
+   * Stem a word provided as a CharSequence.
+   * Returns the result as a CharSequence.
+   */
+  public CharSequence stem(CharSequence word) {
+    return stem(word.toString());
+  }
+  
   /** Stem a word contained in a char[].  Returns true if the stemming process
    * resulted in a word different from the input.  You can retrieve the
    * result with getResultLength()/getResultBuffer() or toString().
@@ -453,7 +456,7 @@ class PorterStemmer
   public boolean stem(char[] wordBuffer, int offset, int wordLen) {
     reset();
     if (b.length < wordLen) {
-      b = new char[ArrayUtil.oversize(wordLen, NUM_BYTES_CHAR)];
+      b = new char[wordLen - offset];
     }
     System.arraycopy(wordBuffer, offset, b, 0, wordLen);
     i = wordLen;
@@ -491,57 +494,5 @@ class PorterStemmer
     i = k+1;
     return dirty;
   }
-
-  /** Test program for demonstrating the Stemmer.  It reads a file and
-   * stems each word, writing the result to standard out.
-   * Usage: Stemmer file-name
-   */
-  public static void main(String[] args) {
-    PorterStemmer s = new PorterStemmer();
-
-    for (int i = 0; i < args.length; i++) {
-      try {
-        InputStream in = new FileInputStream(args[i]);
-        byte[] buffer = new byte[1024];
-        int bufferLen, offset, ch;
-
-        bufferLen = in.read(buffer);
-        offset = 0;
-        s.reset();
-
-        while(true) {
-          if (offset < bufferLen)
-            ch = buffer[offset++];
-          else {
-            bufferLen = in.read(buffer);
-            offset = 0;
-            if (bufferLen < 0)
-              ch = -1;
-            else
-              ch = buffer[offset++];
-          }
-
-          if (Character.isLetter((char) ch)) {
-            s.add(Character.toLowerCase((char) ch));
-          }
-          else {
-             s.stem();
-             System.out.print(s.toString());
-             s.reset();
-             if (ch < 0)
-               break;
-             else {
-               System.out.print((char) ch);
-             }
-           }
-        }
-
-        in.close();
-      }
-      catch (IOException e) {
-        System.out.println("error reading " + args[i]);
-      }
-    }
-  }
 }