You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@poi.apache.org by kl...@apache.org on 2005/04/01 18:58:00 UTC

cvs commit: jakarta-poi/src/java/org/apache/poi/hpsf MutableSection.java Property.java

klute       2005/04/01 08:58:00

  Modified:    src/java/org/apache/poi/hpsf MutableSection.java
                        Property.java
  Log:
  Bug 34247 fixed. Dictionaries are read with the specified codepage now.
  
  Revision  Changes    Path
  1.14      +15 -11    jakarta-poi/src/java/org/apache/poi/hpsf/MutableSection.java
  
  Index: MutableSection.java
  ===================================================================
  RCS file: /home/cvs/jakarta-poi/src/java/org/apache/poi/hpsf/MutableSection.java,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- MutableSection.java	31 Aug 2004 20:47:09 -0000	1.13
  +++ MutableSection.java	1 Apr 2005 16:58:00 -0000	1.14
  @@ -356,8 +356,7 @@
                       getPropertyCount() * 2 * LittleEndian.INT_SIZE;
   
           /* Writing the section's dictionary it tricky. If there is a dictionary
  -         * (property 0) the codepage property (property 1) has to be set, too.
  -         * Since HPSF supports Unicode only, the codepage must be 1200. */
  +         * (property 0) the codepage property (property 1) must be set, too. */
           int codepage = -1;
           if (getProperty(PropertyIDMap.PID_DICTIONARY) != null)
           {
  @@ -370,9 +369,11 @@
                            "Integer object.");
               }
               else
  -                throw new IllegalPropertySetDataException
  -                    ("The codepage property (ID = 1) must be set if the " +
  -                     "section contains a dictionary.");
  +                /* Warning: The codepage property is not set although a
  +                 * dictionary is present. In order to cope with this problem we
  +                 * add the codepage property and set it to Unicode. */
  +                setProperty(PropertyIDMap.PID_CODEPAGE, (long) Variant.VT_I2,
  +                            new Integer(Constants.CP_UNICODE));
               codepage = getCodepage();
           }
   
  @@ -594,11 +595,14 @@
                * don't have a type. */
               setProperty(PropertyIDMap.PID_DICTIONARY, -1, dictionary);
   
  -            /* Set the codepage property (ID 1) for the strings used in the 
  -             * dictionary. HPSF always writes Unicode strings to the
  -             * dictionary. */
  -            setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
  -                        new Integer(Constants.CP_UNICODE));
  +            /* If the codepage property (ID 1) for the strings (keys and
  +             * values) used in the dictionary is not yet defined, set it to
  +             * Unicode. */
  +            final Integer codepage =
  +                (Integer) getProperty(PropertyIDMap.PID_CODEPAGE);
  +            if (codepage == null)
  +                setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
  +                            new Integer(Constants.CP_UNICODE));
           }
           else
               /* Setting the dictionary to null means to remove property 0.
  
  
  
  1.21      +46 -21    jakarta-poi/src/java/org/apache/poi/hpsf/Property.java
  
  Index: Property.java
  ===================================================================
  RCS file: /home/cvs/jakarta-poi/src/java/org/apache/poi/hpsf/Property.java,v
  retrieving revision 1.20
  retrieving revision 1.21
  diff -u -r1.20 -r1.21
  --- Property.java	31 Aug 2004 20:45:00 -0000	1.20
  +++ Property.java	1 Apr 2005 16:58:00 -0000	1.21
  @@ -1,4 +1,3 @@
  -
   /* ====================================================================
      Copyright 2002-2004   Apache Software Foundation
   
  @@ -44,6 +43,10 @@
    * over time but largely depends on your feedback so that the POI team knows
    * which variant types are really needed. So please feel free to submit error
    * reports or patches for the types you need.</p>
  + * 
  + * <p>Microsoft documentation: <a
  + * href="http://msdn.microsoft.com/library/en-us/stg/stg/property_set_display_name_dictionary.asp?frame=true">
  + * Property Set Display Name Dictionary</a>.
    *
    * @author Rainer Klute <a
    * href="mailto:klute@rainer-klute.de">&lt;klute@rainer-klute.de&gt;</a>
  @@ -162,17 +165,19 @@
   
       /**
        * <p>Reads a dictionary.</p>
  -     *
  -     * @param src The byte array containing the bytes making out the
  -     * dictionary.
  -     * @param offset At this offset within <var>src</var> the
  -     * dictionary starts.
  +     * 
  +     * @param src The byte array containing the bytes making out the dictionary.
  +     * @param offset At this offset within <var>src </var> the dictionary
  +     *        starts.
        * @param length The dictionary contains at most this many bytes.
        * @param codepage The codepage of the string values.
        * @return The dictonary
  +     * @throws UnsupportedEncodingException if the dictionary's codepage is not
  +     *         (yet) supported.
        */
       protected Map readDictionary(final byte[] src, final long offset,
                                    final int length, final int codepage)
  +    throws UnsupportedEncodingException
       {
           /* Check whether "offset" points into the "src" array". */
           if (offset < 0 || offset > src.length)
  @@ -195,25 +200,45 @@
               o += LittleEndian.INT_SIZE;
   
               /* The value (a string). The length is the either the
  -             * number of characters if the character set is Unicode or
  -             * else the number of bytes. The length includes
  -             * terminating 0x00 bytes which we have to strip off to
  -             * create a Java string. */
  +             * number of (two-byte) characters if the character set is Unicode
  +             * or the number of bytes if the character set is not Unicode.
  +             * The length includes terminating 0x00 bytes which we have to strip
  +             * off to create a Java string. */
               long sLength = LittleEndian.getUInt(src, o);
               o += LittleEndian.INT_SIZE;
   
  -            /* Read the bytes or characters depending on whether the
  -             * character set is Unicode or not. */
  -            StringBuffer b = new StringBuffer((int) sLength);
  -            for (int j = 0; j < sLength; j++)
  -                if (codepage == Constants.CP_UNICODE)
  +            /* Read the string. */
  +            final StringBuffer b = new StringBuffer();
  +            switch (codepage)
  +            {
  +                case -1:
                   {
  -                    final int i1 = o + (j * 2);
  -                    final int i2 = i1 + 1;
  -                    b.append((char) ((src[i2] << 8) + src[i1]));
  +                    /* Without a codepage the length is equal to the number of
  +                     * bytes. */
  +                    b.append(new String(src, o, (int) sLength));
  +                    break;
                   }
  -                else
  -                    b.append((char) src[o + j]);
  +                case Constants.CP_UNICODE:
  +                {
  +                    /* The length is the number of characters, i.e. the number
  +                     * of bytes is twice the number of the characters. */
  +                    for (int j = 0; j < sLength; j++)
  +                    {
  +                        final int i1 = o + (j * 2);
  +                        final int i2 = i1 + 1;
  +                        b.append((char) ((src[i2] << 8) + src[i1]));
  +                    }
  +                    break;
  +                }
  +                default:
  +                {
  +                    /* For encodings other than Unicode the length is the number
  +                     * of bytes. */
  +                    b.append(new String(src, o, (int) sLength,
  +                             VariantSupport.codepageToEncoding(codepage)));
  +                    break;
  +                }
  +            }
   
               /* Strip 0x00 characters from the end of the string: */
               while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: poi-dev-unsubscribe@jakarta.apache.org
Mailing List:    http://jakarta.apache.org/site/mail2.html#poi
The Apache Jakarta POI Project: http://jakarta.apache.org/poi/