You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@abdera.apache.org by "Jon Hewitt (JIRA)" <ji...@apache.org> on 2009/06/16 20:15:07 UTC

[jira] Created: (ABDERA-243) Extra characters generated for entities in XHTML

Extra characters generated for entities in XHTML
------------------------------------------------

                 Key: ABDERA-243
                 URL: https://issues.apache.org/jira/browse/ABDERA-243
             Project: Abdera
          Issue Type: Bug
    Affects Versions: 0.4.0
         Environment: Windows XP
            Reporter: Jon Hewitt


Run the following program.  Note that the entities (&#x200; &#x201) are translated to the correct characters but an additional character  (the euro sign and '?') follows each correct character:

Output:

     <p xmlns="http://www.w3.org/2005/Atom">This is simple XHTML È€È? contained within a xhtml:div tag.</p>    


import java.io.ByteArrayInputStream;
import java.io.InputStream;

import org.apache.abdera.Abdera;
import org.apache.abdera.model.Document;
import org.apache.abdera.model.Entry;

public class Parser {

	public static void main(String args[]) {

		try {
			String atomText = "<entry xmlns=\"http://www.w3.org/2005/Atom\"> "
					+ "  <title>Item</title> "
					+ "    <content type=\"xhtml\" xmlns:xhtml = \"http://www.w3.org/1999/xhtml\" > "
					+ "   <xhtml:div> "
					+ "    <p>This is simple XHTML &#x200;&#x201; contained within a xhtml:div tag.</p> "
					+ "   </xhtml:div> " + "  </content> " + "</entry>";

			InputStream is = new ByteArrayInputStream(atomText.getBytes("UTF-8"));

			Document<Entry> entryDocument = Abdera.getNewParser().parse(is);
			Entry entry = entryDocument.getRoot();

			System.out.println(entry.getContent());

		} catch (Throwable t) {
			t.printStackTrace();
		}
	}
}

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.