You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2005/01/23 10:49:50 UTC
cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate ProbeLocator.java
burton 2005/01/23 01:49:50
Modified: feedparser/src/java/org/apache/commons/feedparser
AtomFeedParser.java FeedFilter.java FeedParser.java
MetaFeedParser.java NS.java RSSFeedParser.java
feedparser/src/java/org/apache/commons/feedparser/impl
DebugFeedParserListener.java
feedparser/src/java/org/apache/commons/feedparser/locate
ProbeLocator.java
Log:
support for tags
Revision Changes Path
1.17 +2 -1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/AtomFeedParser.java
Index: AtomFeedParser.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/AtomFeedParser.java,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- AtomFeedParser.java 19 Jan 2005 06:22:15 -0000 1.16
+++ AtomFeedParser.java 23 Jan 2005 09:49:50 -0000 1.17
@@ -180,6 +180,7 @@
doContent( state, listener, child );
MetaFeedParser.parse( listener, state );
+ TagFeedParser.parse( listener, state );
listener.onItemEnd();
doLocale( state, listener, child );
1.7 +27 -1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedFilter.java
Index: FeedFilter.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedFilter.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- FeedFilter.java 20 Jan 2005 09:00:37 -0000 1.6
+++ FeedFilter.java 23 Jan 2005 09:49:50 -0000 1.7
@@ -21,6 +21,8 @@
import java.util.*;
import java.util.regex.*;
+import org.apache.log4j.Logger;
+
/**
*
* @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
@@ -28,6 +30,8 @@
*/
public class FeedFilter {
+ private static Logger log = Logger.getLogger( FeedFilter.class );
+
public static boolean DO_REMOVE_LEADING_PROLOG = true;
public static boolean DO_DECODE_ENTITIES = true;
@@ -54,6 +58,8 @@
public static byte[] parse( String content, String encoding )
throws Exception {
+ //FIXME: return an object here so that I can flag a bozo bit.
+
//remove leading prolog...
if ( DO_REMOVE_LEADING_PROLOG )
content = doRemoveLeadingProlog( content, encoding );
@@ -87,6 +93,7 @@
if ( begin > 0 ) {
content = content.substring( begin, content.length() );
+ log.warn( "Skipped whitespace in prolog and moved towards first element." );
}
//now skip to the XML processing instruction when necessary. This is
@@ -96,6 +103,7 @@
if ( begin > 0 ) {
content = content.substring( begin, content.length() );
+ log.warn( "Removed prolog towards first processing instruction." );
}
content = doRemoveElementProlog( content );
@@ -147,6 +155,9 @@
int begin = 0;
+ boolean hasFilterDecodedEntities = false;
+ boolean hasFilterFoundUnknownEntity = false;
+
while ( m.find() ) {
buff.append( content.substring( begin, m.start() ) );
@@ -159,10 +170,19 @@
buff.append( "&#" );
buff.append( value );
buff.append( ";" );
+
+ hasFilterDecodedEntities = true;
+
} else {
+
+ //This is not a known entity so we have no way to correct it.
+ //If this is done then we have a problem and the feed probably
+ //still won't parse
buff.append( "&" );
buff.append( entity );
buff.append( ";" );
+
+ hasFilterFoundUnknownEntity = true;
}
begin = m.end( 0 );
@@ -170,6 +190,12 @@
}
buff.append( content.substring( begin, content.length() ) );
+
+ if ( hasFilterFoundUnknownEntity )
+ log.warn( "Filter encountered unknown entities" );
+
+ if ( hasFilterDecodedEntities )
+ log.warn( "Filter has decoded latin1 entities." );
return buff.toString();
1.12 +8 -3 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedParser.java
Index: FeedParser.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedParser.java,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- FeedParser.java 20 Jan 2005 09:00:37 -0000 1.11
+++ FeedParser.java 23 Jan 2005 09:49:50 -0000 1.12
@@ -183,10 +183,15 @@
//to parse here. (also I wonder if this could be a chance to do
//autodiscovery).
- //fall back on RDF and RSS
+ //fall back on RDF and RSS parsing.
- RSSFeedParser.parse( listener, doc );
+ //FIXME: if this is an UNKNOWN format We need to throw an
+ //UnsupportedFeedxception (which extends FeedParserException)
+ //
+ // In this situation the ROOT elements should be: rss or RDF
+ RSSFeedParser.parse( listener, doc );
+
} catch ( FeedParserException fpe ) {
//if an explicit FeedParserException is thrown just rethrow it..
throw fpe;
1.3 +2 -2 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/MetaFeedParser.java
Index: MetaFeedParser.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/MetaFeedParser.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- MetaFeedParser.java 28 Jun 2004 19:59:47 -0000 1.2
+++ MetaFeedParser.java 23 Jan 2005 09:49:50 -0000 1.3
@@ -27,7 +27,7 @@
import org.jaxen.jdom.*;
/**
- * Handles parsing RSS .
+ * Handles parsing RSS metadata including dates
*
* @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
* @version $Id$
1.5 +7 -1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/NS.java
Index: NS.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/NS.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- NS.java 22 Aug 2004 11:28:04 -0000 1.4
+++ NS.java 23 Jan 2005 09:49:50 -0000 1.5
@@ -72,6 +72,12 @@
public static final Namespace ATOM =
Namespace.getNamespace( "atom", "http://purl.org/atom/ns#" );
+ /**
+ * mod_taxonomy
+ */
+ public static final Namespace TAXO =
+ Namespace.getNamespace( "taxo", "http://purl.org/rss/1.0/modules/taxonomy/" );
+
public static SimpleNamespaceContext context = null;
static {
1.15 +5 -4 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java
Index: RSSFeedParser.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- RSSFeedParser.java 29 Dec 2004 02:18:21 -0000 1.14
+++ RSSFeedParser.java 23 Jan 2005 09:49:50 -0000 1.15
@@ -92,7 +92,7 @@
state.current = item;
doLocale( state, listener, item );
- doParseItem( listener, state );
+ doItem( listener, state );
doLocaleEnd( state, listener, item );
}
@@ -148,8 +148,8 @@
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
- private static void doParseItem( FeedParserListener listener,
- FeedParserState state ) throws Exception {
+ private static void doItem( FeedParserListener listener,
+ FeedParserState state ) throws Exception {
String resource = null;
@@ -257,6 +257,7 @@
}
MetaFeedParser.parse( listener, state );
+ TagFeedParser.parse( listener, state );
listener.onItemEnd();
1.12 +17 -2 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/impl/DebugFeedParserListener.java
Index: DebugFeedParserListener.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/impl/DebugFeedParserListener.java,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- DebugFeedParserListener.java 20 Jan 2005 09:00:37 -0000 1.11
+++ DebugFeedParserListener.java 23 Jan 2005 09:49:50 -0000 1.12
@@ -32,7 +32,8 @@
implements FeedParserListener,
FeedDirectoryParserListener,
MetaFeedParserListener,
- LinkFeedParserListener {
+ LinkFeedParserListener,
+ TagFeedParserListener {
PrintStream out = System.out;
@@ -204,6 +205,20 @@
public void onRelationEnd() {
out.println( "onRelationEnd" );
+ }
+
+ // **** TagFeedParserListener ***********************************************
+
+ public void onTag( FeedParserState state,
+ String tag,
+ String tagspace ) throws FeedParserException {
+
+ out.println( "onTag: tag: " + tag + " tagspace: " + tagspace );
+
+ }
+
+ public void onTagEnd() throws FeedParserException {
+ out.println( "onTagEnd" );
}
}
1.19 +1 -0 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
Index: ProbeLocator.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -r1.18 -r1.19
--- ProbeLocator.java 20 Jan 2005 09:00:37 -0000 1.18
+++ ProbeLocator.java 23 Jan 2005 09:49:50 -0000 1.19
@@ -163,6 +163,7 @@
* @author Brad Neuberg, bkn3@columbia.edu
*/
protected static boolean feedExists(String resource) throws Exception {
+
ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
request.setRequestMethod( "HEAD" );
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org