You are viewing a plain text version of this content. The canonical link for it is here.
Posted to fop-commits@xmlgraphics.apache.org by gm...@apache.org on 2004/09/06 20:03:12 UTC

cvs commit: xml-fop/src/java/org/apache/fop/tools/anttasks SerializeHyphPattern.java

gmazza      2004/09/06 11:03:12

  Modified:    .        build.xml
               src/java/org/apache/fop/layoutmgr LineLayoutManager.java
               src/java/org/apache/fop/tools/anttasks
                        SerializeHyphPattern.java
  Added:       src/java/org/apache/fop/hyphenation ByteVector.java
                        CharVector.java Hyphen.java Hyphenation.java
                        HyphenationException.java HyphenationTree.java
                        Hyphenator.java PatternConsumer.java
                        PatternParser.java TernaryTree.java
  Removed:     src/java/org/apache/fop/layout/hyphenation ByteVector.java
                        CharVector.java Hyphen.java Hyphenation.java
                        HyphenationException.java HyphenationTree.java
                        Hyphenator.java PatternConsumer.java
                        PatternParser.java TernaryTree.java
  Log:
  Moved hyphenation package to org.apache.fop.hyphenation
  
  Revision  Changes    Path
  1.110     +0 -2      xml-fop/build.xml
  
  Index: build.xml
  ===================================================================
  RCS file: /home/cvs/xml-fop/build.xml,v
  retrieving revision 1.109
  retrieving revision 1.110
  diff -u -r1.109 -r1.110
  --- build.xml	14 Aug 2004 18:50:37 -0000	1.109
  +++ build.xml	6 Sep 2004 18:03:12 -0000	1.110
  @@ -792,8 +792,6 @@
         <group title="Layout">
           <package name="org.apache.fop.layoutmgr"/>
           <package name="org.apache.fop.layoutmgr.*"/>
  -        <package name="org.apache.fop.layout"/>
  -        <package name="org.apache.fop.layout.*"/>
         </group>
         <group title="Area Tree">
           <package name="org.apache.fop.area"/>
  
  
  
  1.1                  xml-fop/src/java/org/apache/fop/hyphenation/ByteVector.java
  
  Index: ByteVector.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  /* $Id: ByteVector.java,v 1.1 2004/09/06 18:03:12 gmazza Exp $ */
   
  package org.apache.fop.hyphenation;
  
  import java.io.Serializable;
  
  /**
   * This class implements a simple byte vector with access to the
   * underlying array.
   *
   * @author Carlos Villegas <ca...@uniscope.co.jp>
   */
  public class ByteVector implements Serializable {
  
      /**
       * Capacity increment size
       */
      private static final int DEFAULT_BLOCK_SIZE = 2048;
      private int blockSize;
  
      /**
       * The encapsulated array
       */
      private byte[] array;
  
      /**
       * Points to next free item
       */
      private int n;
  
      public ByteVector() {
          this(DEFAULT_BLOCK_SIZE);
      }
  
      public ByteVector(int capacity) {
          if (capacity > 0) {
              blockSize = capacity;
          } else {
              blockSize = DEFAULT_BLOCK_SIZE;
          }
          array = new byte[blockSize];
          n = 0;
      }
  
      public ByteVector(byte[] a) {
          blockSize = DEFAULT_BLOCK_SIZE;
          array = a;
          n = 0;
      }
  
      public ByteVector(byte[] a, int capacity) {
          if (capacity > 0) {
              blockSize = capacity;
          } else {
              blockSize = DEFAULT_BLOCK_SIZE;
          }
          array = a;
          n = 0;
      }
  
      public byte[] getArray() {
          return array;
      }
  
      /**
       * return number of items in array
       */
      public int length() {
          return n;
      }
  
      /**
       * returns current capacity of array
       */
      public int capacity() {
          return array.length;
      }
  
      public void put(int index, byte val) {
          array[index] = val;
      }
  
      public byte get(int index) {
          return array[index];
      }
  
      /**
       * This is to implement memory allocation in the array. Like malloc().
       */
      public int alloc(int size) {
          int index = n;
          int len = array.length;
          if (n + size >= len) {
              byte[] aux = new byte[len + blockSize];
              System.arraycopy(array, 0, aux, 0, len);
              array = aux;
          }
          n += size;
          return index;
      }
  
      public void trimToSize() {
          if (n < array.length) {
              byte[] aux = new byte[n];
              System.arraycopy(array, 0, aux, 0, n);
              array = aux;
          }
      }
  
  }
  
  
  
  1.1                  xml-fop/src/java/org/apache/fop/hyphenation/CharVector.java
  
  Index: CharVector.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  /* $Id: CharVector.java,v 1.1 2004/09/06 18:03:12 gmazza Exp $ */
   
  package org.apache.fop.hyphenation;
  
  import java.io.Serializable;
  
  /**
   * This class implements a simple char vector with access to the
   * underlying array.
   *
   * @author Carlos Villegas <ca...@uniscope.co.jp>
   */
  public class CharVector implements Cloneable, Serializable {
  
      /**
       * Capacity increment size
       */
      private static final int DEFAULT_BLOCK_SIZE = 2048;
      private int blockSize;
  
      /**
       * The encapsulated array
       */
      private char[] array;
  
      /**
       * Points to next free item
       */
      private int n;
  
      public CharVector() {
          this(DEFAULT_BLOCK_SIZE);
      }
  
      public CharVector(int capacity) {
          if (capacity > 0) {
              blockSize = capacity;
          } else {
              blockSize = DEFAULT_BLOCK_SIZE;
          }
          array = new char[blockSize];
          n = 0;
      }
  
      public CharVector(char[] a) {
          blockSize = DEFAULT_BLOCK_SIZE;
          array = a;
          n = a.length;
      }
  
      public CharVector(char[] a, int capacity) {
          if (capacity > 0) {
              blockSize = capacity;
          } else {
              blockSize = DEFAULT_BLOCK_SIZE;
          }
          array = a;
          n = a.length;
      }
  
      /**
       * Reset Vector but don't resize or clear elements
       */
      public void clear() {
          n = 0;
      }
  
      public Object clone() {
          CharVector cv = new CharVector((char[])array.clone(), blockSize);
          cv.n = this.n;
          return cv;
      }
  
      public char[] getArray() {
          return array;
      }
  
      /**
       * return number of items in array
       */
      public int length() {
          return n;
      }
  
      /**
       * returns current capacity of array
       */
      public int capacity() {
          return array.length;
      }
  
      public void put(int index, char val) {
          array[index] = val;
      }
  
      public char get(int index) {
          return array[index];
      }
  
      public int alloc(int size) {
          int index = n;
          int len = array.length;
          if (n + size >= len) {
              char[] aux = new char[len + blockSize];
              System.arraycopy(array, 0, aux, 0, len);
              array = aux;
          }
          n += size;
          return index;
      }
  
      public void trimToSize() {
          if (n < array.length) {
              char[] aux = new char[n];
              System.arraycopy(array, 0, aux, 0, n);
              array = aux;
          }
      }
  
  }
  
  
  
  1.1                  xml-fop/src/java/org/apache/fop/hyphenation/Hyphen.java
  
  Index: Hyphen.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  /* $Id: Hyphen.java,v 1.1 2004/09/06 18:03:12 gmazza Exp $ */
   
  package org.apache.fop.hyphenation;
  
  import java.io.Serializable;
  
  /**
   * This class represents a hyphen. A 'full' hyphen is made of 3 parts:
   * the pre-break text, post-break text and no-break. If no line-break
   * is generated at this position, the no-break text is used, otherwise,
   * pre-break and post-break are used. Typically, pre-break is equal to
   * the hyphen character and the others are empty. However, this general
   * scheme allows support for cases in some languages where words change
   * spelling if they're split across lines, like german's 'backen' which
   * hyphenates 'bak-ken'. BTW, this comes from TeX.
   *
   * @author Carlos Villegas <ca...@uniscope.co.jp>
   */
  
  public class Hyphen implements Serializable {
      public String preBreak;
      public String noBreak;
      public String postBreak;
  
      Hyphen(String pre, String no, String post) {
          preBreak = pre;
          noBreak = no;
          postBreak = post;
      }
  
      Hyphen(String pre) {
          preBreak = pre;
          noBreak = null;
          postBreak = null;
      }
  
      public String toString() {
          if (noBreak == null 
                  && postBreak == null 
                  && preBreak != null
                  && preBreak.equals("-")) {
              return "-";
                  }
          StringBuffer res = new StringBuffer("{");
          res.append(preBreak);
          res.append("}{");
          res.append(postBreak);
          res.append("}{");
          res.append(noBreak);
          res.append('}');
          return res.toString();
      }
  
  }
  
  
  
  1.1                  xml-fop/src/java/org/apache/fop/hyphenation/Hyphenation.java
  
  Index: Hyphenation.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  /* $Id: Hyphenation.java,v 1.1 2004/09/06 18:03:12 gmazza Exp $ */
   
  package org.apache.fop.hyphenation;
  
  /**
   * This class represents a hyphenated word.
   *
   * @author Carlos Villegas <ca...@uniscope.co.jp>
   */
  public class Hyphenation {
      
      private int[] hyphenPoints;
      private String word;
  
      /**
       * number of hyphenation points in word
       */
      private int len;
  
      /**
       * rawWord as made of alternating strings and {@link Hyphen Hyphen}
       * instances
       */
      Hyphenation(String word, int[] points) {
          this.word = word;
          hyphenPoints = points;
          len = points.length;
      }
  
      /**
       * @return the number of hyphenation points in the word
       */
      public int length() {
          return len;
      }
  
      /**
       * @return the pre-break text, not including the hyphen character
       */
      public String getPreHyphenText(int index) {
          return word.substring(0, hyphenPoints[index]);
      }
  
      /**
       * @return the post-break text
       */
      public String getPostHyphenText(int index) {
          return word.substring(hyphenPoints[index]);
      }
  
      /**
       * @return the hyphenation points
       */
      public int[] getHyphenationPoints() {
          return hyphenPoints;
      }
  
      public String toString() {
          StringBuffer str = new StringBuffer();
          int start = 0;
          for (int i = 0; i < len; i++) {
              str.append(word.substring(start, hyphenPoints[i]) + "-");
              start = hyphenPoints[i];
          }
          str.append(word.substring(start));
          return str.toString();
      }
  
  }
  
  
  
  1.1                  xml-fop/src/java/org/apache/fop/hyphenation/HyphenationException.java
  
  Index: HyphenationException.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  /* $Id: HyphenationException.java,v 1.1 2004/09/06 18:03:12 gmazza Exp $ */
  
  package org.apache.fop.hyphenation;
  
  /**
   * @author Carlos Villegas <ca...@uniscope.co.jp>
   * (todo) Derive from FOPException
   */
  public class HyphenationException extends Exception {
  
      /**
       * @see java.lang.Throwable#Throwable(String)
       */
      public HyphenationException(String msg) {
          super(msg);
      }
  
  }
  
  
  
  1.1                  xml-fop/src/java/org/apache/fop/hyphenation/HyphenationTree.java
  
  Index: HyphenationTree.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  /* $Id: HyphenationTree.java,v 1.1 2004/09/06 18:03:12 gmazza Exp $ */
   
  package org.apache.fop.hyphenation;
  
  import java.io.BufferedReader;
  import java.io.FileInputStream;
  import java.io.FileOutputStream;
  import java.io.FileReader;
  import java.io.IOException;
  import java.io.ObjectInputStream;
  import java.io.ObjectOutputStream;
  import java.io.Serializable;
  import java.util.ArrayList;
  import java.util.HashMap;
  
  /**
   * This tree structure stores the hyphenation patterns in an efficient
   * way for fast lookup. It provides the provides the method to
   * hyphenate a word.
   *
   * @author Carlos Villegas <ca...@uniscope.co.jp>
   */
  public class HyphenationTree extends TernaryTree 
              implements PatternConsumer, Serializable {
  
      /**
       * value space: stores the inteletter values
       */
      protected ByteVector vspace;
  
      /**
       * This map stores hyphenation exceptions
       */
      protected HashMap stoplist;
  
      /**
       * This map stores the character classes
       */
      protected TernaryTree classmap;
  
      /**
       * Temporary map to store interletter values on pattern loading.
       */
      private transient TernaryTree ivalues;
  
      public HyphenationTree() {
          stoplist = new HashMap(23);    // usually a small table
          classmap = new TernaryTree();
          vspace = new ByteVector();
          vspace.alloc(1);    // this reserves index 0, which we don't use
      }
  
      /**
       * Packs the values by storing them in 4 bits, two values into a byte
       * Values range is from 0 to 9. We use zero as terminator,
       * so we'll add 1 to the value.
       * @param values a string of digits from '0' to '9' representing the
       * interletter values.
       * @return the index into the vspace array where the packed values
       * are stored.
       */
      protected int packValues(String values) {
          int i, n = values.length();
          int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
          int offset = vspace.alloc(m);
          byte[] va = vspace.getArray();
          for (i = 0; i < n; i++) {
              int j = i >> 1;
              byte v = (byte)((values.charAt(i) - '0' + 1) & 0x0f);
              if ((i & 1) == 1) {
                  va[j + offset] = (byte)(va[j + offset] | v);
              } else {
                  va[j + offset] = (byte)(v << 4);    // big endian
              }
          }
          va[m - 1 + offset] = 0;    // terminator
          return offset;
      }
  
      protected String unpackValues(int k) {
          StringBuffer buf = new StringBuffer();
          byte v = vspace.get(k++);
          while (v != 0) {
              char c = (char)((v >>> 4) - 1 + '0');
              buf.append(c);
              c = (char)(v & 0x0f);
              if (c == 0) {
                  break;
              }
              c = (char)(c - 1 + '0');
              buf.append(c);
              v = vspace.get(k++);
          }
          return buf.toString();
      }
  
      /**
       * Read hyphenation patterns from an XML file.
       */
      public void loadPatterns(String filename) throws HyphenationException {
          PatternParser pp = new PatternParser(this);
          ivalues = new TernaryTree();
  
          pp.parse(filename);
  
          // patterns/values should be now in the tree
          // let's optimize a bit
          trimToSize();
          vspace.trimToSize();
          classmap.trimToSize();
  
          // get rid of the auxiliary map
          ivalues = null;
      }
  
      public String findPattern(String pat) {
          int k = super.find(pat);
          if (k >= 0) {
              return unpackValues(k);
          }
          return "";
      }
  
      /**
       * String compare, returns 0 if equal or
       * t is a substring of s
       */
      protected int hstrcmp(char[] s, int si, char[] t, int ti) {
          for (; s[si] == t[ti]; si++, ti++) {
              if (s[si] == 0) {
                  return 0;
              }
          }
          if (t[ti] == 0) {
              return 0;
          }
          return s[si] - t[ti];
      }
  
      protected byte[] getValues(int k) {
          StringBuffer buf = new StringBuffer();
          byte v = vspace.get(k++);
          while (v != 0) {
              char c = (char)((v >>> 4) - 1);
              buf.append(c);
              c = (char)(v & 0x0f);
              if (c == 0) {
                  break;
              }
              c = (char)(c - 1);
              buf.append(c);
              v = vspace.get(k++);
          }
          byte[] res = new byte[buf.length()];
          for (int i = 0; i < res.length; i++) {
              res[i] = (byte)buf.charAt(i);
          }
          return res;
      }
  
      /**
       * <p>Search for all possible partial matches of word starting
       * at index an update interletter values. In other words, it
       * does something like:</p>
       * <code>
       * for(i=0; i<patterns.length; i++) {
       * if ( word.substring(index).startsWidth(patterns[i]) )
       * update_interletter_values(patterns[i]);
       * }
       * </code>
       * <p>But it is done in an efficient way since the patterns are
       * stored in a ternary tree. In fact, this is the whole purpose
       * of having the tree: doing this search without having to test
       * every single pattern. The number of patterns for languages
       * such as English range from 4000 to 10000. Thus, doing thousands
       * of string comparisons for each word to hyphenate would be
       * really slow without the tree. The tradeoff is memory, but
       * using a ternary tree instead of a trie, almost halves the
       * the memory used by Lout or TeX. It's also faster than using
       * a hash table</p>
       * @param word null terminated word to match
       * @param index start index from word
       * @param il interletter values array to update
       */
      protected void searchPatterns(char[] word, int index, byte[] il) {
          byte[] values;
          int i = index;
          char p, q;
          char sp = word[i];
          p = root;
  
          while (p > 0 && p < sc.length) {
              if (sc[p] == 0xFFFF) {
                  if (hstrcmp(word, i, kv.getArray(), lo[p]) == 0) {
                      values = getValues(eq[p]);    // data pointer is in eq[]
                      int j = index;
                      for (int k = 0; k < values.length; k++) {
                          if (j < il.length && values[k] > il[j]) {
                              il[j] = values[k];
                          }
                          j++;
                      }
                  }
                  return;
              }
              int d = sp - sc[p];
              if (d == 0) {
                  if (sp == 0) {
                      break;
                  }
                  sp = word[++i];
                  p = eq[p];
                  q = p;
  
                  // look for a pattern ending at this position by searching for
                  // the null char ( splitchar == 0 )
                  while (q > 0 && q < sc.length) {
                      if (sc[q] == 0xFFFF) {        // stop at compressed branch
                          break;
                      }
                      if (sc[q] == 0) {
                          values = getValues(eq[q]);
                          int j = index;
                          for (int k = 0; k < values.length; k++) {
                              if (j < il.length && values[k] > il[j]) {
                                  il[j] = values[k];
                              }
                              j++;
                          }
                          break;
                      } else {
                          q = lo[q];
  
                          /**
                           * actually the code should be:
                           * q = sc[q] < 0 ? hi[q] : lo[q];
                           * but java chars are unsigned
                           */
                      }
                  }
              } else {
                  p = d < 0 ? lo[p] : hi[p];
              }
          }
      }
  
      /**
       * Hyphenate word and return a Hyphenation object.
       * @param word the word to be hyphenated
       * @param remainCharCount Minimum number of characters allowed
       * before the hyphenation point.
       * @param pushCharCount Minimum number of characters allowed after
       * the hyphenation point.
       * @return a {@link Hyphenation Hyphenation} object representing
       * the hyphenated word or null if word is not hyphenated.
       */
      public Hyphenation hyphenate(String word, int remainCharCount,
                                   int pushCharCount) {
          char[] w = word.toCharArray();
          return hyphenate(w, 0, w.length, remainCharCount, pushCharCount);
      }
  
      /**
       * w = "****nnllllllnnn*****",
       * where n is a non-letter, l is a letter,
       * all n may be absent, the first n is at offset,
       * the first l is at offset + iIgnoreAtBeginning;
       * word = ".llllll.'\0'***",
       * where all l in w are copied into word.
       * In the first part of the routine len = w.length,
       * in the second part of the routine len = word.length.
       * Three indices are used:
       * index(w), the index in w,
       * index(word), the index in word,
       * letterindex(word), the index in the letter part of word.
       * The following relations exist:
       * index(w) = offset + i - 1
       * index(word) = i - iIgnoreAtBeginning
       * letterindex(word) = index(word) - 1
       * (see first loop).
       * It follows that:
       * index(w) - index(word) = offset - 1 + iIgnoreAtBeginning
       * index(w) = letterindex(word) + offset + iIgnoreAtBeginning
       */
  
      /**
       * Hyphenate word and return an array of hyphenation points.
       * @param w char array that contains the word
       * @param offset Offset to first character in word
       * @param len Length of word
       * @param remainCharCount Minimum number of characters allowed
       * before the hyphenation point.
       * @param pushCharCount Minimum number of characters allowed after
       * the hyphenation point.
       * @return a {@link Hyphenation Hyphenation} object representing
       * the hyphenated word or null if word is not hyphenated.
       */
      public Hyphenation hyphenate(char[] w, int offset, int len,
                                   int remainCharCount, int pushCharCount) {
          int i;
          char[] word = new char[len + 3];
  
          // normalize word
          char[] c = new char[2];
          int iIgnoreAtBeginning = 0;
          int iLength = len;
          boolean bEndOfLetters = false;
          for (i = 1; i <= len; i++) {
              c[0] = w[offset + i - 1];
              int nc = classmap.find(c, 0);
              if (nc < 0) {    // found a non-letter character ...
                  if (i == (1 + iIgnoreAtBeginning)) {
                      // ... before any letter character
                      iIgnoreAtBeginning ++;
                  } else {
                      // ... after a letter character
                      bEndOfLetters = true;
                  }
                  iLength --;
              } else {
                  if (!bEndOfLetters) {
                      word[i - iIgnoreAtBeginning] = (char)nc;
                  } else {
                      return null;
                  }
              }
          }
          len = iLength;
          if (len < (remainCharCount + pushCharCount)) {
              // word is too short to be hyphenated
              return null;
          }
          int[] result = new int[len + 1];
          int k = 0;
  
          // check exception list first
          String sw = new String(word, 1, len);
          if (stoplist.containsKey(sw)) {
              // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = null)
              ArrayList hw = (ArrayList)stoplist.get(sw);
              int j = 0;
              for (i = 0; i < hw.size(); i++) {
                  Object o = hw.get(i);
                  // j = index(sw) = letterindex(word)?
                  // result[k] = corresponding index(w)
                  if (o instanceof String) {
                      j += ((String)o).length();
                      if (j >= remainCharCount && j < (len - pushCharCount)) {
                          result[k++] = j + iIgnoreAtBeginning;
                      }
                  }
              }
          } else {
              // use algorithm to get hyphenation points
              word[0] = '.';                    // word start marker
              word[len + 1] = '.';              // word end marker
              word[len + 2] = 0;                // null terminated
              byte[] il = new byte[len + 3];    // initialized to zero
              for (i = 0; i < len + 1; i++) {
                  searchPatterns(word, i, il);
              }
  
              // hyphenation points are located where interletter value is odd
              // i is letterindex(word),
              // i + 1 is index(word),
              // result[k] = corresponding index(w)
              for (i = 0; i < len; i++) {
                  if (((il[i + 1] & 1) == 1) && i >= remainCharCount
                          && i <= (len - pushCharCount)) {
                      result[k++] = i + iIgnoreAtBeginning;
                  }
              }
          }
  
  
          if (k > 0) {
              // trim result array
              int[] res = new int[k];
              System.arraycopy(result, 0, res, 0, k);
              return new Hyphenation(new String(w, offset, len), res);
          } else {
              return null;
          }
      }
  
      /**
       * Add a character class to the tree. It is used by
       * {@link PatternParser PatternParser} as callback to
       * add character classes. Character classes define the
       * valid word characters for hyphenation. If a word contains
       * a character not defined in any of the classes, it is not hyphenated.
       * It also defines a way to normalize the characters in order
       * to compare them with the stored patterns. Usually pattern
       * files use only lower case characters, in this case a class
       * for letter 'a', for example, should be defined as "aA", the first
       * character being the normalization char.
       */
      public void addClass(String chargroup) {
          if (chargroup.length() > 0) {
              char equivChar = chargroup.charAt(0);
              char[] key = new char[2];
              key[1] = 0;
              for (int i = 0; i < chargroup.length(); i++) {
                  key[0] = chargroup.charAt(i);
                  classmap.insert(key, 0, equivChar);
              }
          }
      }
  
      /**
       * Add an exception to the tree. It is used by
       * {@link PatternParser PatternParser} class as callback to
       * store the hyphenation exceptions.
       * @param word normalized word
       * @param hyphenatedword a vector of alternating strings and
       * {@link Hyphen hyphen} objects.
       */
      public void addException(String word, ArrayList hyphenatedword) {
          stoplist.put(word, hyphenatedword);
      }
  
      /**
       * Add a pattern to the tree. Mainly, to be used by
       * {@link PatternParser PatternParser} class as callback to
       * add a pattern to the tree.
       * @param pattern the hyphenation pattern
       * @param ivalue interletter weight values indicating the
       * desirability and priority of hyphenating at a given point
       * within the pattern. It should contain only digit characters.
       * (i.e. '0' to '9').
       */
      public void addPattern(String pattern, String ivalue) {
          int k = ivalues.find(ivalue);
          if (k <= 0) {
              k = packValues(ivalue);
              ivalues.insert(ivalue, (char)k);
          }
          insert(pattern, (char)k);
      }
  
      public void printStats() {
          System.out.println("Value space size = "
                             + Integer.toString(vspace.length()));
          super.printStats();
  
      }
  
      public static void main(String[] argv) throws Exception {
          HyphenationTree ht = null;
          int minCharCount = 2;
          BufferedReader in =
              new BufferedReader(new java.io.InputStreamReader(System.in));
          while (true) {
              System.out.print("l:\tload patterns from XML\n"
                      + "L:\tload patterns from serialized object\n"
                      + "s:\tset minimun character count\n"
                      + "w:\twrite hyphenation tree to object file\n"
                      + "h:\thyphenate\n"
                      + "f:\tfind pattern\n"
                      + "b:\tbenchmark\n"
                      + "q:\tquit\n\n"
                      + "Command:");
              String token = in.readLine().trim();
              if (token.equals("f")) {
                  System.out.print("Pattern: ");
                  token = in.readLine().trim();
                  System.out.println("Values: " + ht.findPattern(token));
              } else if (token.equals("s")) {
                  System.out.print("Minimun value: ");
                  token = in.readLine().trim();
                  minCharCount = Integer.parseInt(token);
              } else if (token.equals("l")) {
                  ht = new HyphenationTree();
                  System.out.print("XML file name: ");
                  token = in.readLine().trim();
                  ht.loadPatterns(token);
              } else if (token.equals("L")) {
                  ObjectInputStream ois = null;
                  System.out.print("Object file name: ");
                  token = in.readLine().trim();
                  try {
                      ois = new ObjectInputStream(new FileInputStream(token));
                      ht = (HyphenationTree)ois.readObject();
                  } catch (Exception e) {
                      e.printStackTrace();
                  } finally {
                      if (ois != null) {
                          try {
                              ois.close();
                          } catch (IOException e) {
                              //ignore
                          }
                      }
                  }
              } else if (token.equals("w")) {
                  System.out.print("Object file name: ");
                  token = in.readLine().trim();
                  ObjectOutputStream oos = null;
                  try {
                      oos = new ObjectOutputStream(new FileOutputStream(token));
                      oos.writeObject(ht);
                  } catch (Exception e) {
                      e.printStackTrace();
                  } finally {
                      if (oos != null) {
                          try {
                              oos.flush();
                          } catch (IOException e) {
                              //ignore
                          }
                          try {
                              oos.close();
                          } catch (IOException e) {
                              //ignore
                          }
                      }
                  }
              } else if (token.equals("h")) {
                  System.out.print("Word: ");
                  token = in.readLine().trim();
                  System.out.print("Hyphenation points: ");
                  System.out.println(ht.hyphenate(token, minCharCount,
                                                  minCharCount));
              } else if (token.equals("b")) {
                  if (ht == null) {
                      System.out.println("No patterns has been loaded.");
                      break;
                  }
                  System.out.print("Word list filename: ");
                  token = in.readLine().trim();
                  long starttime = 0;
                  int counter = 0;
                  try {
                      BufferedReader reader =
                          new BufferedReader(new FileReader(token));
                      String line;
  
                      starttime = System.currentTimeMillis();
                      while ((line = reader.readLine()) != null) {
                          // System.out.print("\nline: ");
                          Hyphenation hyp = ht.hyphenate(line, minCharCount,
                                                         minCharCount);
                          if (hyp != null) {
                              String hword = hyp.toString();
                              // System.out.println(line);
                              // System.out.println(hword);
                          } else {
                              // System.out.println("No hyphenation");
                          }
                          counter++;
                      }
                  } catch (Exception ioe) {
                      System.out.println("Exception " + ioe);
                      ioe.printStackTrace();
                  }
                  long endtime = System.currentTimeMillis();
                  long result = endtime - starttime;
                  System.out.println(counter + " words in " + result
                                     + " Millisekunden hyphenated");
  
              } else if (token.equals("q")) {
                  break;
              }
          }
  
      }
  
  }
  
  
  
  1.1                  xml-fop/src/java/org/apache/fop/hyphenation/Hyphenator.java
  
  Index: Hyphenator.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  /* $Id: Hyphenator.java,v 1.1 2004/09/06 18:03:12 gmazza Exp $ */
   
  package org.apache.fop.hyphenation;
  
  import java.io.BufferedInputStream;
  import java.io.File;
  import java.io.FileInputStream;
  import java.io.IOException;
  import java.io.InputStream;
  import java.io.ObjectInputStream;
  import java.util.Hashtable;
  
  /**
   * This class is the main entry point to the hyphenation package.
   * You can use only the static methods or create an instance.
   *
   * @author Carlos Villegas <ca...@uniscope.co.jp>
   */
  public class Hyphenator {
      
      /**@todo Don't use statics */
      private static Hashtable hyphenTrees = new Hashtable();
  
      private HyphenationTree hyphenTree = null;
      private int remainCharCount = 2;
      private int pushCharCount = 2;
      private static boolean errorDump = false;
  
      public Hyphenator(String lang, String country, int leftMin,
                        int rightMin) {
          hyphenTree = getHyphenationTree(lang, country);
          remainCharCount = leftMin;
          pushCharCount = rightMin;
      }
  
      public static HyphenationTree getHyphenationTree(String lang,
              String country) {
          String key = lang;
          // check whether the country code has been used
          if (country != null && !country.equals("none")) {
              key += "_" + country;
          }
          // first try to find it in the cache
          if (hyphenTrees.containsKey(key)) {
              return (HyphenationTree)hyphenTrees.get(key);
          }
          if (hyphenTrees.containsKey(lang)) {
              return (HyphenationTree)hyphenTrees.get(lang);
          }
  
          HyphenationTree hTree = getFopHyphenationTree(key);
          if (hTree == null) {
              String hyphenDir = "/hyph";
              if (hyphenDir != null) {
                  hTree = getUserHyphenationTree(key, hyphenDir);
              }
          }
          // put it into the pattern cache
          if (hTree != null) {
              hyphenTrees.put(key, hTree);
          } else {
              /**@todo Proper logging please */
              //log.error("Couldn't find hyphenation pattern "
              //                       + key);
          }
          return hTree;
      }
  
      private static InputStream getResourceStream(String key) {
          InputStream is = null;
          // Try to use Context Class Loader to load the properties file.
          try {
              java.lang.reflect.Method getCCL =
                  Thread.class.getMethod("getContextClassLoader", new Class[0]);
              if (getCCL != null) {
                  ClassLoader contextClassLoader =
                      (ClassLoader)getCCL.invoke(Thread.currentThread(),
                                                 new Object[0]);
                  is = contextClassLoader.getResourceAsStream("hyph/" + key
                                                              + ".hyp");
              }
          } catch (Exception e) {
              //ignore, fallback further down
          }
  
          if (is == null) {
              is = Hyphenator.class.getResourceAsStream("/hyph/" + key
                                                        + ".hyp");
          }
  
          return is;
      }
  
      public static HyphenationTree getFopHyphenationTree(String key) {
          HyphenationTree hTree = null;
          ObjectInputStream ois = null;
          InputStream is = null;
          try {
              is = getResourceStream(key);
              if (is == null) {
                  if (key.length() == 5) {
                      is = getResourceStream(key.substring(0, 2));
                      if (is != null) {
                          //log.error("Couldn't find hyphenation pattern  "
                          //                       + key
                          //                       + "\nusing general language pattern "
                          //                       + key.substring(0, 2)
                          //                       + " instead.");
                      } else {
                          if (errorDump) {
                              //log.error("Couldn't find precompiled "
                              //                       + "fop hyphenation pattern "
                              //                       + key + ".hyp");
                          }
                          return null;
                      }
                  } else {
                      if (errorDump) {
                          //log.error("Couldn't find precompiled "
                          //                       + "fop hyphenation pattern "
                          //                       + key + ".hyp");
                      }
                      return null;
                  }
              }
              ois = new ObjectInputStream(is);
              hTree = (HyphenationTree)ois.readObject();
          } catch (Exception e) {
              /**@todo proper logging please */
              e.printStackTrace();
          } finally {
              if (ois != null) {
                  try {
                      ois.close();
                  } catch (IOException e) {
                      //log.error("can't close hyphenation object stream");
                  }
              }
          }
          return hTree;
      }
  
      /**
       * load tree from serialized file or xml file
       * using configuration settings
       */
      public static HyphenationTree getUserHyphenationTree(String key,
              String hyphenDir) {
          HyphenationTree hTree = null;
          // I use here the following convention. The file name specified in
          // the configuration is taken as the base name. First we try
          // name + ".hyp" assuming a serialized HyphenationTree. If that fails
          // we try name + ".xml", assumming a raw hyphenation pattern file.
  
          // first try serialized object
          File hyphenFile = new File(hyphenDir, key + ".hyp");
          if (hyphenFile.exists()) {
              ObjectInputStream ois = null;
              try {
                  ois = new ObjectInputStream(new BufferedInputStream(
                              new FileInputStream(hyphenFile)));
                  hTree = (HyphenationTree)ois.readObject();
              } catch (Exception e) {
                  /**@todo Proper logging please */
                  e.printStackTrace();
              } finally {
                  if (ois != null) {
                      try {
                          ois.close();
                      } catch (IOException e) {
                          //ignore
                      }
                  }
              }
              return hTree;
          } else {
  
              // try the raw XML file
              hyphenFile = new File(hyphenDir, key + ".xml");
              if (hyphenFile.exists()) {
                  hTree = new HyphenationTree();
                  if (errorDump) {
                      //log.error("reading " + hyphenDir + key
                      //                       + ".xml");
                  }
                  try {
                      hTree.loadPatterns(hyphenFile.getPath());
                      if (errorDump) {
                          System.out.println("Stats: ");
                          hTree.printStats();
                      }
                      return hTree;
                  } catch (HyphenationException ex) {
                      if (errorDump) {
                          //log.error("Can't load user patterns "
                          //                       + "from xml file " + hyphenDir
                          //                       + key + ".xml");
                      }
                      return null;
                  }
              } else {
                  if (errorDump) {
                      //log.error("Tried to load "
                      //                       + hyphenFile.toString()
                      //                       + "\nCannot find compiled nor xml file for "
                      //                       + "hyphenation pattern" + key);
                  }
                  return null;
              }
          }
      }
  
      public static Hyphenation hyphenate(String lang, String country,
                                          String word, int leftMin,
                                          int rightMin) {
          HyphenationTree hTree = getHyphenationTree(lang, country);
          if (hTree == null) {
              //log.error("Error building hyphenation tree for language "
              //                       + lang);
              return null;
          }
          return hTree.hyphenate(word, leftMin, rightMin);
      }
  
      public static Hyphenation hyphenate(String lang, String country,
                                          char[] word, int offset, int len,
                                          int leftMin, int rightMin) {
          HyphenationTree hTree = getHyphenationTree(lang, country);
          if (hTree == null) {
              //log.error("Error building hyphenation tree for language "
              //                       + lang);
              return null;
          }
          return hTree.hyphenate(word, offset, len, leftMin, rightMin);
      }
  
      public void setMinRemainCharCount(int min) {
          remainCharCount = min;
      }
  
      public void setMinPushCharCount(int min) {
          pushCharCount = min;
      }
  
      public void setLanguage(String lang, String country) {
          hyphenTree = getHyphenationTree(lang, country);
      }
  
      public Hyphenation hyphenate(char[] word, int offset, int len) {
          if (hyphenTree == null) {
              return null;
          }
          return hyphenTree.hyphenate(word, offset, len, remainCharCount,
                                      pushCharCount);
      }
  
      public Hyphenation hyphenate(String word) {
          if (hyphenTree == null) {
              return null;
          }
          return hyphenTree.hyphenate(word, remainCharCount, pushCharCount);
      }
  
  }
  
  
  
  1.1                  xml-fop/src/java/org/apache/fop/hyphenation/PatternConsumer.java
  
  Index: PatternConsumer.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  /* $Id: PatternConsumer.java,v 1.1 2004/09/06 18:03:12 gmazza Exp $ */
   
  package org.apache.fop.hyphenation;
  
  import java.util.ArrayList;
  
  /**
   * This interface is used to connect the XML pattern file parser to
   * the hyphenation tree.
   *
   * @author Carlos Villegas <ca...@uniscope.co.jp>
   */
  public interface PatternConsumer {
  
      /**
       * Add a character class.
       * A character class defines characters that are considered
       * equivalent for the purpose of hyphenation (e.g. "aA"). It
       * usually means to ignore case.
       * @param chargroup character group
       */
      void addClass(String chargroup);
  
      /**
       * Add a hyphenation exception. An exception replaces the
       * result obtained by the algorithm for cases for which this
       * fails or the user wants to provide his own hyphenation.
       * A hyphenatedword is a vector of alternating String's and
       * {@link Hyphen Hyphen} instances
       */
      void addException(String word, ArrayList hyphenatedword);
  
      /**
       * Add hyphenation patterns.
       * @param pattern the pattern
       * @param values interletter values expressed as a string of
       * digit characters.
       */
      void addPattern(String pattern, String values);
  
  }
  
  
  
  1.1                  xml-fop/src/java/org/apache/fop/hyphenation/PatternParser.java
  
  Index: PatternParser.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  /* $Id: PatternParser.java,v 1.1 2004/09/06 18:03:12 gmazza Exp $ */
   
  package org.apache.fop.hyphenation;
  
  // SAX
  import org.xml.sax.XMLReader;
  import org.xml.sax.InputSource;
  import org.xml.sax.SAXException;
  import org.xml.sax.SAXParseException;
  import org.xml.sax.helpers.DefaultHandler;
  import org.xml.sax.Attributes;
  
  // Java
  import java.io.File;
  import java.io.IOException;
  import java.util.ArrayList;
  import java.net.URL;
  
  /**
   * A SAX document handler to read and parse hyphenation patterns
   * from a XML file.
   *
   * @author Carlos Villegas <ca...@uniscope.co.jp>
   */
  public class PatternParser extends DefaultHandler implements PatternConsumer {
  
      XMLReader parser;
      int currElement;
      PatternConsumer consumer;
      StringBuffer token;
      ArrayList exception;
      char hyphenChar;
      String errMsg;
  
      static final int ELEM_CLASSES = 1;
      static final int ELEM_EXCEPTIONS = 2;
      static final int ELEM_PATTERNS = 3;
      static final int ELEM_HYPHEN = 4;
  
      public PatternParser() throws HyphenationException {
          token = new StringBuffer();
          parser = createParser();
          parser.setContentHandler(this);
          parser.setErrorHandler(this);
          hyphenChar = '-';    // default
  
      }
  
      public PatternParser(PatternConsumer consumer)
              throws HyphenationException {
          this();
          this.consumer = consumer;
      }
  
      public void setConsumer(PatternConsumer consumer) {
          this.consumer = consumer;
      }
  
      public void parse(String filename) throws HyphenationException {
          InputSource uri = fileInputSource(filename);
  
          try {
              parser.parse(uri);
          } catch (SAXException e) {
              throw new HyphenationException(errMsg);
          } catch (IOException e) {
              throw new HyphenationException(e.getMessage());
          } catch (NullPointerException e) {
              throw new HyphenationException("SAX parser not available");
          }
      }
  
      /**
       * creates a SAX parser, using the value of org.xml.sax.parser
       * defaulting to org.apache.xerces.parsers.SAXParser
       *
       * @return the created SAX parser
       */
      static XMLReader createParser() throws HyphenationException {
          String parserClassName = System.getProperty("org.xml.sax.parser");
          if (parserClassName == null) {
              parserClassName = "org.apache.xerces.parsers.SAXParser";
          }
          // System.out.println("using SAX parser " + parserClassName);
  
          try {
              return (XMLReader)Class.forName(parserClassName).newInstance();
          } catch (ClassNotFoundException e) {
              throw new HyphenationException("Could not find "
                                             + parserClassName);
          } catch (InstantiationException e) {
              throw new HyphenationException("Could not instantiate "
                                             + parserClassName);
          } catch (IllegalAccessException e) {
              throw new HyphenationException("Could not access "
                                             + parserClassName);
          } catch (ClassCastException e) {
              throw new HyphenationException(parserClassName
                                             + " is not a SAX driver");
          }
      }
  
      /**
       * create an InputSource from a file name
       *
       * @param filename the name of the file
       * @return the InputSource created
       */
      protected static InputSource fileInputSource(String filename)
              throws HyphenationException {
  
          /* this code adapted from James Clark's in XT */
          File file = new File(filename);
          String path = file.getAbsolutePath();
          String fSep = System.getProperty("file.separator");
          if (fSep != null && fSep.length() == 1) {
              path = path.replace(fSep.charAt(0), '/');
          }
          if (path.length() > 0 && path.charAt(0) != '/') {
              path = '/' + path;
          }
          try {
              return new InputSource(new URL("file", null, path).toString());
          } catch (java.net.MalformedURLException e) {
              throw new HyphenationException("unexpected MalformedURLException");
          }
      }
  
      protected String readToken(StringBuffer chars) {
          String word;
          boolean space = false;
          int i;
          for (i = 0; i < chars.length(); i++) {
              if (Character.isWhitespace(chars.charAt(i))) {
                  space = true;
              } else {
                  break;
              }
          }
          if (space) {
              // chars.delete(0,i);
              for (int countr = i; countr < chars.length(); countr++) {
                  chars.setCharAt(countr - i, chars.charAt(countr));
              }
              chars.setLength(chars.length() - i);
              if (token.length() > 0) {
                  word = token.toString();
                  token.setLength(0);
                  return word;
              }
          }
          space = false;
          for (i = 0; i < chars.length(); i++) {
              if (Character.isWhitespace(chars.charAt(i))) {
                  space = true;
                  break;
              }
          }
          token.append(chars.toString().substring(0, i));
          // chars.delete(0,i);
          for (int countr = i; countr < chars.length(); countr++) {
              chars.setCharAt(countr - i, chars.charAt(countr));
          }
          chars.setLength(chars.length() - i);
          if (space) {
              word = token.toString();
              token.setLength(0);
              return word;
          }
          token.append(chars);
          return null;
      }
  
      protected static String getPattern(String word) {
          StringBuffer pat = new StringBuffer();
          int len = word.length();
          for (int i = 0; i < len; i++) {
              if (!Character.isDigit(word.charAt(i))) {
                  pat.append(word.charAt(i));
              }
          }
          return pat.toString();
      }
  
      protected ArrayList normalizeException(ArrayList ex) {
          ArrayList res = new ArrayList();
          for (int i = 0; i < ex.size(); i++) {
              Object item = ex.get(i);
              if (item instanceof String) {
                  String str = (String)item;
                  StringBuffer buf = new StringBuffer();
                  for (int j = 0; j < str.length(); j++) {
                      char c = str.charAt(j);
                      if (c != hyphenChar) {
                          buf.append(c);
                      } else {
                          res.add(buf.toString());
                          buf.setLength(0);
                          char[] h = new char[1];
                          h[0] = hyphenChar;
                          // we use here hyphenChar which is not necessarily
                          // the one to be printed
                          res.add(new Hyphen(new String(h), null, null));
                      }
                  }
                  if (buf.length() > 0) {
                      res.add(buf.toString());
                  }
              } else {
                  res.add(item);
              }
          }
          return res;
      }
  
      protected String getExceptionWord(ArrayList ex) {
          StringBuffer res = new StringBuffer();
          for (int i = 0; i < ex.size(); i++) {
              Object item = ex.get(i);
              if (item instanceof String) {
                  res.append((String)item);
              } else {
                  if (((Hyphen)item).noBreak != null) {
                      res.append(((Hyphen)item).noBreak);
                  }
              }
          }
          return res.toString();
      }
  
      protected static String getInterletterValues(String pat) {
          StringBuffer il = new StringBuffer();
          String word = pat + "a";    // add dummy letter to serve as sentinel
          int len = word.length();
          for (int i = 0; i < len; i++) {
              char c = word.charAt(i);
              if (Character.isDigit(c)) {
                  il.append(c);
                  i++;
              } else {
                  il.append('0');
              }
          }
          return il.toString();
      }
  
      //
      // DocumentHandler methods
      //
  
      /**
       * Start element.
       */
      public void startElement(String uri, String local, String raw,
                               Attributes attrs) {
          if (local.equals("hyphen-char")) {
              String h = attrs.getValue("value");
              if (h != null && h.length() == 1) {
                  hyphenChar = h.charAt(0);
              }
          } else if (local.equals("classes")) {
              currElement = ELEM_CLASSES;
          } else if (local.equals("patterns")) {
              currElement = ELEM_PATTERNS;
          } else if (local.equals("exceptions")) {
              currElement = ELEM_EXCEPTIONS;
              exception = new ArrayList();
          } else if (local.equals("hyphen")) {
              if (token.length() > 0) {
                  exception.add(token.toString());
              }
              exception.add(new Hyphen(attrs.getValue("pre"),
                                              attrs.getValue("no"),
                                              attrs.getValue("post")));
              currElement = ELEM_HYPHEN;
          }
          token.setLength(0);
      }
  
      public void endElement(String uri, String local, String raw) {
  
          if (token.length() > 0) {
              String word = token.toString();
              switch (currElement) {
              case ELEM_CLASSES:
                  consumer.addClass(word);
                  break;
              case ELEM_EXCEPTIONS:
                  exception.add(word);
                  exception = normalizeException(exception);
                  consumer.addException(getExceptionWord(exception),
                                        (ArrayList)exception.clone());
                  break;
              case ELEM_PATTERNS:
                  consumer.addPattern(getPattern(word),
                                      getInterletterValues(word));
                  break;
              case ELEM_HYPHEN:
                  // nothing to do
                  break;
              }
              if (currElement != ELEM_HYPHEN) {
                  token.setLength(0);
              }
          }
          if (currElement == ELEM_HYPHEN) {
              currElement = ELEM_EXCEPTIONS;
          } else {
              currElement = 0;
          }
  
      }
  
      /**
       * Characters.
       */
      public void characters(char ch[], int start, int length) {
          StringBuffer chars = new StringBuffer(length);
          chars.append(ch, start, length);
          String word = readToken(chars);
          while (word != null) {
              // System.out.println("\"" + word + "\"");
              switch (currElement) {
              case ELEM_CLASSES:
                  consumer.addClass(word);
                  break;
              case ELEM_EXCEPTIONS:
                  exception.add(word);
                  exception = normalizeException(exception);
                  consumer.addException(getExceptionWord(exception),
                                        (ArrayList)exception.clone());
                  exception.clear();
                  break;
              case ELEM_PATTERNS:
                  consumer.addPattern(getPattern(word),
                                      getInterletterValues(word));
                  break;
              }
              word = readToken(chars);
          }
  
      }
  
      //
      // ErrorHandler methods
      //
  
      /**
       * Warning.
       */
      public void warning(SAXParseException ex) {
          errMsg = "[Warning] " + getLocationString(ex) + ": "
                   + ex.getMessage();
      }
  
      /**
       * Error.
       */
      public void error(SAXParseException ex) {
          errMsg = "[Error] " + getLocationString(ex) + ": " + ex.getMessage();
      }
  
      /**
       * Fatal error.
       */
      public void fatalError(SAXParseException ex) throws SAXException {
          errMsg = "[Fatal Error] " + getLocationString(ex) + ": "
                   + ex.getMessage();
          throw ex;
      }
  
      /**
       * Returns a string of the location.
       */
      private String getLocationString(SAXParseException ex) {
          StringBuffer str = new StringBuffer();
  
          String systemId = ex.getSystemId();
          if (systemId != null) {
              int index = systemId.lastIndexOf('/');
              if (index != -1) {
                  systemId = systemId.substring(index + 1);
              }
              str.append(systemId);
          }
          str.append(':');
          str.append(ex.getLineNumber());
          str.append(':');
          str.append(ex.getColumnNumber());
  
          return str.toString();
  
      }    // getLocationString(SAXParseException):String
  
  
      // PatternConsumer implementation for testing purposes
      public void addClass(String c) {
          System.out.println("class: " + c);
      }
  
      public void addException(String w, ArrayList e) {
          System.out.println("exception: " + w + " : " + e.toString());
      }
  
      public void addPattern(String p, String v) {
          System.out.println("pattern: " + p + " : " + v);
      }
  
      public static void main(String[] args) throws Exception {
          if (args.length > 0) {
              PatternParser pp = new PatternParser();
              pp.setConsumer(pp);
              pp.parse(args[0]);
          }
      }
  
  }
  
  
  
  1.1                  xml-fop/src/java/org/apache/fop/hyphenation/TernaryTree.java
  
  Index: TernaryTree.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  /* $Id: TernaryTree.java,v 1.1 2004/09/06 18:03:12 gmazza Exp $ */
  
  package org.apache.fop.hyphenation;
  
  import java.util.Enumeration;
  import java.util.Stack;
  import java.io.Serializable;
  
  /**
   * <h2>Ternary Search Tree.</h2>
   *
   * <p>A ternary search tree is a hibrid between a binary tree and
   * a digital search tree (trie). Keys are limited to strings.
   * A data value of type char is stored in each leaf node.
   * It can be used as an index (or pointer) to the data.
   * Branches that only contain one key are compressed to one node
   * by storing a pointer to the trailer substring of the key.
   * This class is intended to serve as base class or helper class
   * to implement Dictionary collections or the like. Ternary trees
   * have some nice properties as the following: the tree can be
   * traversed in sorted order, partial matches (wildcard) can be
   * implemented, retrieval of all keys within a given distance
   * from the target, etc. The storage requirements are higher than
   * a binary tree but a lot less than a trie. Performance is
   * comparable with a hash table, sometimes it outperforms a hash
   * function (most of the time can determine a miss faster than a hash).</p>
   *
   * <p>The main purpose of this java port is to serve as a base for
   * implementing TeX's hyphenation algorithm (see The TeXBook,
   * appendix H). Each language requires from 5000 to 15000 hyphenation
   * patterns which will be keys in this tree. The strings patterns
   * are usually small (from 2 to 5 characters), but each char in the
   * tree is stored in a node. Thus memory usage is the main concern.
   * We will sacrify 'elegance' to keep memory requirenments to the
   * minimum. Using java's char type as pointer (yes, I know pointer
   * it is a forbidden word in java) we can keep the size of the node
   * to be just 8 bytes (3 pointers and the data char). This gives
   * room for about 65000 nodes. In my tests the english patterns
   * took 7694 nodes and the german patterns 10055 nodes,
   * so I think we are safe.</p>
   *
   * <p>All said, this is a map with strings as keys and char as value.
   * Pretty limited!. It can be extended to a general map by
   * using the string representation of an object and using the
   * char value as an index to an array that contains the object
   * values.</p>
   *
   * @author cav@uniscope.co.jp
   */
  
  public class TernaryTree implements Cloneable, Serializable {
  
      /**
       * We use 4 arrays to represent a node. I guess I should have created
       * a proper node class, but somehow Knuth's pascal code made me forget
       * we now have a portable language with virtual memory management and
       * automatic garbage collection! And now is kind of late, furthermore,
       * if it ain't broken, don't fix it.
       */
  
      /**
       * Pointer to low branch and to rest of the key when it is
       * stored directly in this node, we don't have unions in java!
       */
      protected char[] lo;
  
      /**
       * Pointer to high branch.
       */
      protected char[] hi;
  
      /**
       * Pointer to equal branch and to data when this node is a string terminator.
       */
      protected char[] eq;
  
      /**
       * <P>The character stored in this node: splitchar.
       * Two special values are reserved:</P>
       * <ul><li>0x0000 as string terminator</li>
       * <li>0xFFFF to indicate that the branch starting at
       * this node is compressed</li></ul>
       * <p>This shouldn't be a problem if we give the usual semantics to
       * strings since 0xFFFF is garanteed not to be an Unicode character.</p>
       */
      protected char[] sc;
  
      /**
       * This vector holds the trailing of the keys when the branch is compressed.
       */
      protected CharVector kv;
  
      protected char root;
      protected char freenode;
      protected int length;    // number of items in tree
  
      protected static final int BLOCK_SIZE = 2048;    // allocation size for arrays
  
      TernaryTree() {
          init();
      }
  
      protected void init() {
          root = 0;
          freenode = 1;
          length = 0;
          lo = new char[BLOCK_SIZE];
          hi = new char[BLOCK_SIZE];
          eq = new char[BLOCK_SIZE];
          sc = new char[BLOCK_SIZE];
          kv = new CharVector();
      }
  
      /**
       * Branches are initially compressed, needing
       * one node per key plus the size of the string
       * key. They are decompressed as needed when
       * another key with same prefix
       * is inserted. This saves a lot of space,
       * specially for long keys.
       */
      public void insert(String key, char val) {
          // make sure we have enough room in the arrays
          int len = key.length()
                    + 1;    // maximum number of nodes that may be generated
          if (freenode + len > eq.length) {
              redimNodeArrays(eq.length + BLOCK_SIZE);
          }
          char strkey[] = new char[len--];
          key.getChars(0, len, strkey, 0);
          strkey[len] = 0;
          root = insert(root, strkey, 0, val);
      }
  
      public void insert(char[] key, int start, char val) {
          int len = strlen(key) + 1;
          if (freenode + len > eq.length) {
              redimNodeArrays(eq.length + BLOCK_SIZE);
          }
          root = insert(root, key, start, val);
      }
  
      /**
       * The actual insertion function, recursive version.
       */
      private char insert(char p, char[] key, int start, char val) {
          int len = strlen(key, start);
          if (p == 0) {
              // this means there is no branch, this node will start a new branch.
              // Instead of doing that, we store the key somewhere else and create
              // only one node with a pointer to the key
              p = freenode++;
              eq[p] = val;           // holds data
              length++;
              hi[p] = 0;
              if (len > 0) {
                  sc[p] = 0xFFFF;    // indicates branch is compressed
                  lo[p] = (char)kv.alloc(len
                                         + 1);    // use 'lo' to hold pointer to key
                  strcpy(kv.getArray(), lo[p], key, start);
              } else {
                  sc[p] = 0;
                  lo[p] = 0;
              }
              return p;
          }
  
          if (sc[p] == 0xFFFF) {
              // branch is compressed: need to decompress
              // this will generate garbage in the external key array
              // but we can do some garbage collection later
              char pp = freenode++;
              lo[pp] = lo[p];    // previous pointer to key
              eq[pp] = eq[p];    // previous pointer to data
              lo[p] = 0;
              if (len > 0) {
                  sc[p] = kv.get(lo[pp]);
                  eq[p] = pp;
                  lo[pp]++;
                  if (kv.get(lo[pp]) == 0) {
                      // key completly decompressed leaving garbage in key array
                      lo[pp] = 0;
                      sc[pp] = 0;
                      hi[pp] = 0;
                  } else {
                      // we only got first char of key, rest is still there
                      sc[pp] = 0xFFFF;
                  }
              } else {
                  // In this case we can save a node by swapping the new node
                  // with the compressed node
                  sc[pp] = 0xFFFF;
                  hi[p] = pp;
                  sc[p] = 0;
                  eq[p] = val;
                  length++;
                  return p;
              }
          }
          char s = key[start];
          if (s < sc[p]) {
              lo[p] = insert(lo[p], key, start, val);
          } else if (s == sc[p]) {
              if (s != 0) {
                  eq[p] = insert(eq[p], key, start + 1, val);
              } else {
                  // key already in tree, overwrite data
                  eq[p] = val;
              }
          } else {
              hi[p] = insert(hi[p], key, start, val);
          }
          return p;
      }
  
      /**
       * Compares 2 null terminated char arrays
       */
      public static int strcmp(char[] a, int startA, char[] b, int startB) {
          for (; a[startA] == b[startB]; startA++, startB++) {
              if (a[startA] == 0) {
                  return 0;
              }
          }
          return a[startA] - b[startB];
      }
  
      /**
       * Compares a string with null terminated char array
       */
      public static int strcmp(String str, char[] a, int start) {
          int i, d, len = str.length();
          for (i = 0; i < len; i++) {
              d = (int)str.charAt(i) - a[start + i];
              if (d != 0) {
                  return d;
              }
              if (a[start + i] == 0) {
                  return d;
              }
          }
          if (a[start + i] != 0) {
              return (int)-a[start + i];
          }
          return 0;
  
      }
  
      public static void strcpy(char[] dst, int di, char[] src, int si) {
          while (src[si] != 0) {
              dst[di++] = src[si++];
          }
          dst[di] = 0;
      }
  
      public static int strlen(char[] a, int start) {
          int len = 0;
          for (int i = start; i < a.length && a[i] != 0; i++) {
              len++;
          }
          return len;
      }
  
      public static int strlen(char[] a) {
          return strlen(a, 0);
      }
  
      public int find(String key) {
          int len = key.length();
          char strkey[] = new char[len + 1];
          key.getChars(0, len, strkey, 0);
          strkey[len] = 0;
  
          return find(strkey, 0);
      }
  
      public int find(char[] key, int start) {
          int d;
          char p = root;
          int i = start;
          char c;
  
          while (p != 0) {
              if (sc[p] == 0xFFFF) {
                  if (strcmp(key, i, kv.getArray(), lo[p]) == 0) {
                      return eq[p];
                  } else {
                      return -1;
                  }
              }
              c = key[i];
              d = c - sc[p];
              if (d == 0) {
                  if (c == 0) {
                      return eq[p];
                  }
                  i++;
                  p = eq[p];
              } else if (d < 0) {
                  p = lo[p];
              } else {
                  p = hi[p];
              }
          }
          return -1;
      }
  
      public boolean knows(String key) {
          return (find(key) >= 0);
      }
  
      // redimension the arrays
      private void redimNodeArrays(int newsize) {
          int len = newsize < lo.length ? newsize : lo.length;
          char[] na = new char[newsize];
          System.arraycopy(lo, 0, na, 0, len);
          lo = na;
          na = new char[newsize];
          System.arraycopy(hi, 0, na, 0, len);
          hi = na;
          na = new char[newsize];
          System.arraycopy(eq, 0, na, 0, len);
          eq = na;
          na = new char[newsize];
          System.arraycopy(sc, 0, na, 0, len);
          sc = na;
      }
  
      public int size() {
          return length;
      }
  
      public Object clone() {
          TernaryTree t = new TernaryTree();
          t.lo = (char[])this.lo.clone();
          t.hi = (char[])this.hi.clone();
          t.eq = (char[])this.eq.clone();
          t.sc = (char[])this.sc.clone();
          t.kv = (CharVector)this.kv.clone();
          t.root = this.root;
          t.freenode = this.freenode;
          t.length = this.length;
  
          return t;
      }
  
      /**
       * Recursively insert the median first and then the median of the
       * lower and upper halves, and so on in order to get a balanced
       * tree. The array of keys is assumed to be sorted in ascending
       * order.
       */
      protected void insertBalanced(String[] k, char[] v, int offset, int n) {
          int m;
          if (n < 1) {
              return;
          }
          m = n >> 1;
  
          insert(k[m + offset], v[m + offset]);
          insertBalanced(k, v, offset, m);
  
          insertBalanced(k, v, offset + m + 1, n - m - 1);
      }
  
  
      /**
       * Balance the tree for best search performance
       */
      public void balance() {
          // System.out.print("Before root splitchar = "); System.out.println(sc[root]);
  
          int i = 0, n = length;
          String[] k = new String[n];
          char[] v = new char[n];
          Iterator iter = new Iterator();
          while (iter.hasMoreElements()) {
              v[i] = iter.getValue();
              k[i++] = (String)iter.nextElement();
          }
          init();
          insertBalanced(k, v, 0, n);
  
          // With uniform letter distribution sc[root] should be around 'm'
          // System.out.print("After root splitchar = "); System.out.println(sc[root]);
      }
  
      /**
       * Each node stores a character (splitchar) which is part of
       * some key(s). In a compressed branch (one that only contain
       * a single string key) the trailer of the key which is not
       * already in nodes is stored  externally in the kv array.
       * As items are inserted, key substrings decrease.
       * Some substrings may completely  disappear when the whole
       * branch is totally decompressed.
       * The tree is traversed to find the key substrings actually
       * used. In addition, duplicate substrings are removed using
       * a map (implemented with a TernaryTree!).
       *
       */
      public void trimToSize() {
          // first balance the tree for best performance
          balance();
  
          // redimension the node arrays
          redimNodeArrays(freenode);
  
          // ok, compact kv array
          CharVector kx = new CharVector();
          kx.alloc(1);
          TernaryTree map = new TernaryTree();
          compact(kx, map, root);
          kv = kx;
          kv.trimToSize();
      }
  
      private void compact(CharVector kx, TernaryTree map, char p) {
          int k;
          if (p == 0) {
              return;
          }
          if (sc[p] == 0xFFFF) {
              k = map.find(kv.getArray(), lo[p]);
              if (k < 0) {
                  k = kx.alloc(strlen(kv.getArray(), lo[p]) + 1);
                  strcpy(kx.getArray(), k, kv.getArray(), lo[p]);
                  map.insert(kx.getArray(), k, (char)k);
              }
              lo[p] = (char)k;
          } else {
              compact(kx, map, lo[p]);
              if (sc[p] != 0) {
                  compact(kx, map, eq[p]);
              }
              compact(kx, map, hi[p]);
          }
      }
  
  
      public Enumeration keys() {
          return new Iterator();
      }
  
      public class Iterator implements Enumeration {
  
          /**
           * current node index
           */
          int cur;
  
          /**
           * current key
           */
          String curkey;
  
          private class Item implements Cloneable {
              char parent;
              char child;
  
              public Item() {
                  parent = 0;
                  child = 0;
              }
  
              public Item(char p, char c) {
                  parent = p;
                  child = c;
              }
  
              public Object clone() {
                  return new Item(parent, child);
              }
  
          }
  
          /**
           * Node stack
           */
          Stack ns;
  
          /**
           * key stack implemented with a StringBuffer
           */
          StringBuffer ks;
  
          public Iterator() {
              cur = -1;
              ns = new Stack();
              ks = new StringBuffer();
              rewind();
          }
  
          public void rewind() {
              ns.removeAllElements();
              ks.setLength(0);
              cur = root;
              run();
          }
  
          public Object nextElement() {
              String res = new String(curkey);
              cur = up();
              run();
              return res;
          }
  
          public char getValue() {
              if (cur >= 0) {
                  return eq[cur];
              }
              return 0;
          }
  
          public boolean hasMoreElements() {
              return (cur != -1);
          }
  
          /**
           * traverse upwards
           */
          private int up() {
              Item i = new Item();
              int res = 0;
  
              if (ns.empty()) {
                  return -1;
              }
  
              if (cur != 0 && sc[cur] == 0) {
                  return lo[cur];
              }
  
              boolean climb = true;
  
              while (climb) {
                  i = (Item)ns.pop();
                  i.child++;
                  switch (i.child) {
                  case 1:
                      if (sc[i.parent] != 0) {
                          res = eq[i.parent];
                          ns.push(i.clone());
                          ks.append(sc[i.parent]);
                      } else {
                          i.child++;
                          ns.push(i.clone());
                          res = hi[i.parent];
                      }
                      climb = false;
                      break;
  
                  case 2:
                      res = hi[i.parent];
                      ns.push(i.clone());
                      if (ks.length() > 0) {
                          ks.setLength(ks.length() - 1);    // pop
                      }
                      climb = false;
                      break;
  
                  default:
                      if (ns.empty()) {
                          return -1;
                      }
                      climb = true;
                      break;
                  }
              }
              return res;
          }
  
          /**
           * traverse the tree to find next key
           */
          private int run() {
              if (cur == -1) {
                  return -1;
              }
  
              boolean leaf = false;
              while (true) {
                  // first go down on low branch until leaf or compressed branch
                  while (cur != 0) {
                      if (sc[cur] == 0xFFFF) {
                          leaf = true;
                          break;
                      }
                      ns.push(new Item((char)cur, '\u0000'));
                      if (sc[cur] == 0) {
                          leaf = true;
                          break;
                      }
                      cur = lo[cur];
                  }
                  if (leaf) {
                      break;
                  }
                  // nothing found, go up one node and try again
                  cur = up();
                  if (cur == -1) {
                      return -1;
                  }
              }
              // The current node should be a data node and
              // the key should be in the key stack (at least partially)
              StringBuffer buf = new StringBuffer(ks.toString());
              if (sc[cur] == 0xFFFF) {
                  int p = lo[cur];
                  while (kv.get(p) != 0) {
                      buf.append(kv.get(p++));
                  }
              }
              curkey = buf.toString();
              return 0;
          }
  
      }
  
      public void printStats() {
          System.out.println("Number of keys = " + Integer.toString(length));
          System.out.println("Node count = " + Integer.toString(freenode));
          // System.out.println("Array length = " + Integer.toString(eq.length));
          System.out.println("Key Array length = "
                             + Integer.toString(kv.length()));
  
          /*
           * for(int i=0; i<kv.length(); i++)
           * if ( kv.get(i) != 0 )
           * System.out.print(kv.get(i));
           * else
           * System.out.println("");
           * System.out.println("Keys:");
           * for(Enumeration enum = keys(); enum.hasMoreElements(); )
           * System.out.println(enum.nextElement());
           */
  
      }
  
      public static void main(String[] args) throws Exception {
          TernaryTree tt = new TernaryTree();
          tt.insert("Carlos", 'C');
          tt.insert("Car", 'r');
          tt.insert("palos", 'l');
          tt.insert("pa", 'p');
          tt.trimToSize();
          System.out.println((char)tt.find("Car"));
          System.out.println((char)tt.find("Carlos"));
          System.out.println((char)tt.find("alto"));
          tt.printStats();
      }
  
  }
  
  
  
  
  1.26      +2 -2      xml-fop/src/java/org/apache/fop/layoutmgr/LineLayoutManager.java
  
  Index: LineLayoutManager.java
  ===================================================================
  RCS file: /home/cvs/xml-fop/src/java/org/apache/fop/layoutmgr/LineLayoutManager.java,v
  retrieving revision 1.25
  retrieving revision 1.26
  diff -u -r1.25 -r1.26
  --- LineLayoutManager.java	5 Sep 2004 18:16:32 -0000	1.25
  +++ LineLayoutManager.java	6 Sep 2004 18:03:12 -0000	1.26
  @@ -24,8 +24,8 @@
   import org.apache.fop.fo.Constants;
   import org.apache.fop.fo.properties.CommonMarginBlock;
   import org.apache.fop.fo.properties.CommonHyphenation;
  -import org.apache.fop.layout.hyphenation.Hyphenation;
  -import org.apache.fop.layout.hyphenation.Hyphenator;
  +import org.apache.fop.hyphenation.Hyphenation;
  +import org.apache.fop.hyphenation.Hyphenator;
   import org.apache.fop.traits.BlockProps;
   import org.apache.fop.area.LineArea;
   import org.apache.fop.area.Resolveable;
  
  
  
  1.3       +2 -2      xml-fop/src/java/org/apache/fop/tools/anttasks/SerializeHyphPattern.java
  
  Index: SerializeHyphPattern.java
  ===================================================================
  RCS file: /home/cvs/xml-fop/src/java/org/apache/fop/tools/anttasks/SerializeHyphPattern.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- SerializeHyphPattern.java	27 Feb 2004 17:56:04 -0000	1.2
  +++ SerializeHyphPattern.java	6 Sep 2004 18:03:12 -0000	1.3
  @@ -28,8 +28,8 @@
   import org.apache.tools.ant.DirectoryScanner;
   
   // FOP
  -import org.apache.fop.layout.hyphenation.HyphenationTree;
  -import org.apache.fop.layout.hyphenation.HyphenationException;
  +import org.apache.fop.hyphenation.HyphenationTree;
  +import org.apache.fop.hyphenation.HyphenationException;
   
   /**
    * SerializeHyphPattern
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: fop-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: fop-cvs-help@xml.apache.org