You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2004/02/27 01:00:54 UTC
cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test TestAtom.java TestAtom.xml
burton 2004/02/26 16:00:54
Modified: feedparser TODO
feedparser/src/java/org/apache/commons/feedparser
AtomFeedParser.java ContentFeedParserListener.java
DefaultFeedParserListener.java FeedParserState.java
feedparser/src/java/org/apache/commons/feedparser/test
TestAtom.java TestAtom.xml
Log:
support for content and links
Revision Changes Path
1.5 +3 -1 jakarta-commons-sandbox/feedparser/TODO
Index: TODO
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/TODO,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- TODO 26 Feb 2004 23:16:28 -0000 1.4
+++ TODO 27 Feb 2004 00:00:54 -0000 1.5
@@ -35,4 +35,6 @@
-
- content type exposed...
- - http://intertwingly.net/wiki/pie/content
\ No newline at end of file
+ - http://intertwingly.net/wiki/pie/content
+
+- Support for RSS 1.0 mod_link and Atom links
\ No newline at end of file
1.3 +86 -1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/AtomFeedParser.java
Index: AtomFeedParser.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/AtomFeedParser.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- AtomFeedParser.java 26 Feb 2004 23:16:28 -0000 1.2
+++ AtomFeedParser.java 27 Feb 2004 00:00:54 -0000 1.3
@@ -150,11 +150,96 @@
listener.onItem( state, title, link, description, link );
doMeta( state, listener, child );
+
+ doContent( state, listener, child );
listener.onItemEnd();
}
+ }
+
+ private static void doContent( FeedParserState state,
+ FeedParserListener listener,
+ Element current ) throws Exception {
+
+ if ( ! (listener instanceof ContentFeedParserListener) )
+ return;
+
+ ContentFeedParserListener clistener = (ContentFeedParserListener)listener;
+
+ XPath xpath = new XPath( "atom:content" );
+ xpath.setNamespaceContext( NS.context );
+
+ List items = xpath.selectNodes( current );
+
+ Iterator i = items.iterator();
+
+ //update items.
+ while ( i.hasNext() ) {
+
+ Element content = (Element)i.next();
+
+ String type = content.getAttributeValue( "type", "text/plain" );
+ String mode = content.getAttributeValue( "mode", "xml" );
+
+ String format = null;
+ String encoding = null;
+
+ String value = null;
+
+ //
+ if ( mode.equals( "xml" ) ) {
+ value = content.getText();
+ }
+
+ if ( mode.equals( "escaped" ) ) {
+ value = getXMLOfContent( content.getContent() );
+ }
+
+ clistener.onContent( state, type, format, encoding, mode, value );
+
+ }
+
+ }
+
+ private static String getXMLOfContent( Element element ) {
+ return getXMLOfContent( element.getContent() );
+ }
+
+ /**
+ * Get the content of the given element.
+ *
+ * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
+ */
+ private static String getXMLOfContent( List content ) {
+
+ StringBuffer buff = new StringBuffer( 1000 );
+
+ XMLOutputter outputter = new XMLOutputter( " ", true );
+
+ Iterator it = content.iterator();
+
+ while ( it.hasNext() ) {
+
+ Object next = it.next();
+
+ if ( next instanceof String ) {
+ buff.append( (String)next );
+ } else if ( next instanceof Element ) {
+ buff.append( outputter.outputString( (Element)next ) );
+ } else if ( next instanceof CDATA ) {
+ buff.append( outputter.outputString( (CDATA)next ) );
+ } else if ( next instanceof Comment ) {
+ buff.append( outputter.outputString( (Comment)next ) );
+ } else if ( next instanceof Text ) {
+ buff.append( outputter.outputString( (Text)next ) );
+ }
+
+ }
+
+ return buff.toString();
+
}
private static void doMeta( FeedParserState state,
1.2 +5 -2 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/ContentFeedParserListener.java
Index: ContentFeedParserListener.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/ContentFeedParserListener.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- ContentFeedParserListener.java 26 Feb 2004 23:16:28 -0000 1.1
+++ ContentFeedParserListener.java 27 Feb 2004 00:00:54 -0000 1.2
@@ -107,7 +107,10 @@
* content:item. An encoding is a reversable method of including content within
* the RSS file.
*
- * @param value String value of the found content.
+ * @param value String value of the found content. if this is Base64
+ * encoded content we do NOT decode the value but return it as a string.
+ * This is done because the content might be binary and returning as a
+ * string would be invalid.
*
* @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
*/
1.4 +18 -3 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/DefaultFeedParserListener.java
Index: DefaultFeedParserListener.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/DefaultFeedParserListener.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- DefaultFeedParserListener.java 26 Feb 2004 23:16:28 -0000 1.3
+++ DefaultFeedParserListener.java 27 Feb 2004 00:00:54 -0000 1.4
@@ -68,7 +68,11 @@
* @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
* @version $Id$
*/
-public abstract class DefaultFeedParserListener implements FeedParserListener, MetaFeedParserListener {
+public abstract class DefaultFeedParserListener implements FeedParserListener,
+ MetaFeedParserListener,
+ ModContentFeedParserListener,
+ XHTMLFeedParserListener,
+ ContentFeedParserListener {
private Object context = null;
@@ -150,5 +154,16 @@
public void onXHTMLBodyEnd() throws FeedParserException {}
public void onFeedVersion( FeedVersion version ) throws FeedParserException {}
-
+
+ // **** ContentFeedParserListener *******************************************
+
+ public void onContent( FeedParserState state,
+ String type,
+ String format,
+ String encoding,
+ String mode,
+ String value ) throws FeedParserException {}
+
+ public void onContent() throws FeedParserException {}
+
}
1.2 +11 -1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedParserState.java
Index: FeedParserState.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedParserState.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- FeedParserState.java 18 Feb 2004 20:05:20 -0000 1.1
+++ FeedParserState.java 27 Feb 2004 00:00:54 -0000 1.2
@@ -67,6 +67,16 @@
Element current = null;
+ /**
+ * xml:lang
+ */
+ public String lang;
+
+ /**
+ * xml:base
+ */
+ public String base;
+
public FeedParserState() { }
public FeedParserState( Element current ) {
1.3 +14 -1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestAtom.java
Index: TestAtom.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestAtom.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- TestAtom.java 18 Feb 2004 21:07:23 -0000 1.2
+++ TestAtom.java 27 Feb 2004 00:00:54 -0000 1.3
@@ -120,6 +120,19 @@
}
+ public void onContent( FeedParserState state,
+ String type,
+ String format,
+ String encoding,
+ String mode,
+ String value ) throws FeedParserException {
+
+ System.out.println( "content (type): " + type );
+ System.out.println( "content (mode): " + mode );
+ System.out.println( "content (value): " + value );
+
+ }
+
};
listener.setContext( this );
1.2 +1 -1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestAtom.xml
Index: TestAtom.xml
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestAtom.xml,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- TestAtom.xml 18 Feb 2004 20:06:27 -0000 1.1
+++ TestAtom.xml 27 Feb 2004 00:00:54 -0000 1.2
@@ -116,7 +116,7 @@
<p>It also occurs to me that people who say parsing HTML is too hard probably aren't <a href="http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely">sanitizing embedded HTML properly</a>. <a href="http://www.alpha-geek.com/2004/01/02/example_of_hard_to_parse_html.html">Do not parse HTML with regexes</a>. Here's an <a href="http://diveintomark.org/public/2004/01/crazy_rss.xml">example feed that illustrates the problem</a>. My <a href="http://diveintomark.org/projects/feed_parser/version_27.html">Ultraliberal Feed Parser 2.7</a> sanitizes this correctly, and in the true spirit of hacking, I have no sympathy for people who can't be bothered to write code I've already written.</p>
-<p>Another entire class of unhelpful suggestions that seems to pop up on a regular basis is unproductive mandates about how producers can produce Atom feeds, or how clients can consume them. Things like <q>let's mandate that feeds can't use CDATA blocks</q> (runs contrary to the XML specification), or <q>let's mandate that feeds can't contain processing instructions</q> (technically possible, but to what purpose?), or <q>let's mandate that clients can only consume feeds with conforming XML parsers</q>.</p>
+<p>Another entire class of unhelpful suggestions that seems to pop up on a regular basis is unproductive mandates about how producers can produce Atom feeds, or how clients can consume them. Things like <q>let's mandate that feeds can't use blocks</q> (runs contrary to the XML specification), or <q>let's mandate that feeds can't contain processing instructions</q> (technically possible, but to what purpose?), or <q>let's mandate that clients can only consume feeds with conforming XML parsers</q>.</p>
<p>This last one is interesting, in that it tries to wish away Postel's Law (originally stated in <a href="http://www.ibiblio.org/pub/docs/rfc/rfc793.txt">RFC 793</a> as <q cite="http://www.ibiblio.org/pub/docs/rfc/rfc793.txt">be conservative in what you do, be liberal in what you accept from others</q>). Various people have tried to mandate this principle out of existence, some going so far as to claim that <a href="http://essaysfromexodus.scripting.com/postelsLaw">Postel's Law should not apply to XML</a>, because (apparently) the three letters <q>X</q>, <q>M</q>, and <q>L</q> are a magical combination that signal a glorious revolution that somehow overturns the fundamental principles of interoperability.</p>
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org