You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2004/08/05 00:17:03 UTC
cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate DiscoveryLocator.java FeedLocator.java FeedReference.java LinkLocator.java ProbeLocator.java
burton 2004/08/04 15:17:02
Modified: feedparser/src/java/org/apache/commons/feedparser/locate
DiscoveryLocator.java FeedLocator.java
FeedReference.java LinkLocator.java
ProbeLocator.java
Log:
Fixed bad bug with HTML link discovery of feeds...
Revision Changes Path
1.10 +5 -9 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java
Index: DiscoveryLocator.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- DiscoveryLocator.java 28 Jul 2004 23:42:18 -0000 1.9
+++ DiscoveryLocator.java 4 Aug 2004 22:17:01 -0000 1.10
@@ -28,10 +28,6 @@
*/
public class DiscoveryLocator {
- public static final String ATOM_MEDIA_TYPE = "application/atom+xml";
- public static final String RSS_MEDIA_TYPE = "application/rss+xml";
- public static final String XML_MEDIA_TYPE = "text/xml";
-
//NOTE: this will break if the attributes aren't in the right order.
static Pattern pattern =
Pattern.compile( "<link[^>]+type=[\"']([^\"']+)[\"'][^>]+href=[\"']([^\"']+)" );
@@ -40,9 +36,9 @@
static {
- mediatypes.add( ATOM_MEDIA_TYPE );
- mediatypes.add( RSS_MEDIA_TYPE );
- mediatypes.add( XML_MEDIA_TYPE );
+ mediatypes.add( FeedReference.ATOM_MEDIA_TYPE );
+ mediatypes.add( FeedReference.RSS_MEDIA_TYPE );
+ mediatypes.add( FeedReference.XML_MEDIA_TYPE );
}
@@ -78,10 +74,10 @@
list.add( feedReference );
- if ( type.equals( ATOM_MEDIA_TYPE ) )
+ if ( type.equals( FeedReference.ATOM_MEDIA_TYPE ) )
list.setAdAtomFeed( feedReference );
- if ( type.equals( RSS_MEDIA_TYPE ) )
+ if ( type.equals( FeedReference.RSS_MEDIA_TYPE ) )
list.setAdRSSFeed( feedReference );
}
1.8 +9 -2 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java
Index: FeedLocator.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- FeedLocator.java 28 Jul 2004 23:42:18 -0000 1.7
+++ FeedLocator.java 4 Aug 2004 22:17:01 -0000 1.8
@@ -86,13 +86,20 @@
public static void main( String[] args ) throws Exception {
- String resource = "http://www.corante.com/strange/";
+ //This should find http://www.electoral-vote.com/index.rss
+ String resource = "http://www.electoral-vote.com/";
+
+ //String resource = "http://www.corante.com/strange/";
//String resource = "http://peerfear.org";
List l = locate( resource );
Iterator it = l.iterator();
-
+
+ if ( it.hasNext() == false ) {
+ System.out.println( "NO LINKS FOUND" );
+ }
+
while ( it.hasNext() ) {
FeedReference ref = (FeedReference)it.next();
1.7 +7 -0 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedReference.java
Index: FeedReference.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedReference.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- FeedReference.java 11 May 2004 19:53:18 -0000 1.6
+++ FeedReference.java 4 Aug 2004 22:17:01 -0000 1.7
@@ -25,6 +25,10 @@
*/
public class FeedReference {
+ public static final String ATOM_MEDIA_TYPE = "application/atom+xml";
+ public static final String RSS_MEDIA_TYPE = "application/rss+xml";
+ public static final String XML_MEDIA_TYPE = "text/xml";
+
public static int METHOD_AUTODISCOVERY;
/**
@@ -32,6 +36,9 @@
*/
public String resource = null;
+ /**
+ * The media type of this feed.
+ */
public String type = null;
/**
1.3 +29 -5 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java
Index: LinkLocator.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- LinkLocator.java 16 Apr 2004 20:20:01 -0000 1.2
+++ LinkLocator.java 4 Aug 2004 22:17:02 -0000 1.3
@@ -16,11 +16,15 @@
package org.apache.commons.feedparser.locate;
+import org.apache.commons.feedparser.*;
+
import java.io.*;
import java.util.*;
import java.net.*;
/**
+ * Find links by parsing the raw HTML. We only return links that are on the
+ * same site and link to /index.rdf LINKS and so forth.
*
* @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
*/
@@ -31,7 +35,9 @@
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
- public static final List locate( String resource, String content, List list )
+ public static final List locate( String resource,
+ String content,
+ final FeedList list )
throws Exception {
AnchorParserListener listener = new AnchorParserListener() {
@@ -40,8 +46,6 @@
String site = null;
- ArrayList list = new ArrayList();
-
HashSet seen = new HashSet();
public void setContext( Object context ) {
@@ -92,9 +96,29 @@
//FIXME: we should assert tha that these feeds are from the SAME
//domain not a link to another feed.
+
+ if ( current.endsWith( ".rss" ) ) {
+
+ //this is an RSS feed.
+ FeedReference ref = new FeedReference( current,
+ FeedReference.RSS_MEDIA_TYPE );
+
+ list.add( ref );
+ list.setAdRSSFeed( ref );
+
+ }
+
+ if ( current.endsWith( ".atom" ) ) {
+
+ FeedReference ref = new FeedReference( current,
+ FeedReference.ATOM_MEDIA_TYPE );
+
+ list.add( ref );
+ list.setAdAtomFeed( ref );
+
+ }
if ( current.endsWith( ".xml" ) ||
- current.endsWith( ".rss" ) ||
current.endsWith( ".rdf" ) ) {
//feed for this blog.
@@ -116,7 +140,7 @@
listener.setContext( resource );
AnchorParser.parseAnchors( content, listener );
- return (List)listener.getResult();
+ return list;
}
1.4 +2 -2 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
Index: ProbeLocator.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- ProbeLocator.java 14 Jul 2004 03:02:55 -0000 1.3
+++ ProbeLocator.java 4 Aug 2004 22:17:02 -0000 1.4
@@ -42,7 +42,7 @@
//FIXME: Live Journal has both FOAF and Atom... We need support for BOTH
atomProbeMapping.put( "blogspot.com", "/atom.xml" );
atomProbeMapping.put( "livejournal.com", "/data/atom" );
-
+
}
/**
@@ -68,7 +68,7 @@
href += mapping;
FeedReference feedReference = new FeedReference( href,
- DiscoveryLocator.ATOM_MEDIA_TYPE );
+ FeedReference.ATOM_MEDIA_TYPE );
list.add( feedReference );
list.setAdAtomFeed( feedReference );
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org