You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2004/08/05 00:17:03 UTC

cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate DiscoveryLocator.java FeedLocator.java FeedReference.java LinkLocator.java ProbeLocator.java

burton      2004/08/04 15:17:02

  Modified:    feedparser/src/java/org/apache/commons/feedparser/locate
                        DiscoveryLocator.java FeedLocator.java
                        FeedReference.java LinkLocator.java
                        ProbeLocator.java
  Log:
  Fixed bad bug with HTML link discovery of feeds...
  
  Revision  Changes    Path
  1.10      +5 -9      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java
  
  Index: DiscoveryLocator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- DiscoveryLocator.java	28 Jul 2004 23:42:18 -0000	1.9
  +++ DiscoveryLocator.java	4 Aug 2004 22:17:01 -0000	1.10
  @@ -28,10 +28,6 @@
    */
   public class DiscoveryLocator {
   
  -    public static final String ATOM_MEDIA_TYPE = "application/atom+xml";
  -    public static final String RSS_MEDIA_TYPE  = "application/rss+xml";
  -    public static final String XML_MEDIA_TYPE  = "text/xml";
  -    
       //NOTE: this will break if the attributes aren't in the right order.
       static Pattern pattern =
           Pattern.compile( "<link[^>]+type=[\"']([^\"']+)[\"'][^>]+href=[\"']([^\"']+)" );
  @@ -40,9 +36,9 @@
   
       static {
   
  -        mediatypes.add( ATOM_MEDIA_TYPE );
  -        mediatypes.add( RSS_MEDIA_TYPE );
  -        mediatypes.add( XML_MEDIA_TYPE );
  +        mediatypes.add( FeedReference.ATOM_MEDIA_TYPE );
  +        mediatypes.add( FeedReference.RSS_MEDIA_TYPE );
  +        mediatypes.add( FeedReference.XML_MEDIA_TYPE );
           
       }
   
  @@ -78,10 +74,10 @@
                   
                   list.add( feedReference );
   
  -                if ( type.equals( ATOM_MEDIA_TYPE ) )
  +                if ( type.equals( FeedReference.ATOM_MEDIA_TYPE ) )
                       list.setAdAtomFeed( feedReference );
   
  -                if ( type.equals( RSS_MEDIA_TYPE ) )
  +                if ( type.equals( FeedReference.RSS_MEDIA_TYPE ) )
                       list.setAdRSSFeed( feedReference );
   
               }
  
  
  
  1.8       +9 -2      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java
  
  Index: FeedLocator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- FeedLocator.java	28 Jul 2004 23:42:18 -0000	1.7
  +++ FeedLocator.java	4 Aug 2004 22:17:01 -0000	1.8
  @@ -86,13 +86,20 @@
   
       public static void main( String[] args ) throws Exception {
   
  -        String resource = "http://www.corante.com/strange/";
  +        //This should find http://www.electoral-vote.com/index.rss
  +        String resource = "http://www.electoral-vote.com/";
  +
  +        //String resource = "http://www.corante.com/strange/";
           //String resource = "http://peerfear.org";
   
           List l = locate( resource );
   
           Iterator it = l.iterator();
  -        
  +
  +        if ( it.hasNext() == false ) {
  +            System.out.println( "NO LINKS FOUND" );
  +        } 
  +
           while ( it.hasNext() ) {
   
               FeedReference ref = (FeedReference)it.next();
  
  
  
  1.7       +7 -0      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedReference.java
  
  Index: FeedReference.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedReference.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- FeedReference.java	11 May 2004 19:53:18 -0000	1.6
  +++ FeedReference.java	4 Aug 2004 22:17:01 -0000	1.7
  @@ -25,6 +25,10 @@
    */
   public class FeedReference {
   
  +    public static final String ATOM_MEDIA_TYPE = "application/atom+xml";
  +    public static final String RSS_MEDIA_TYPE  = "application/rss+xml";
  +    public static final String XML_MEDIA_TYPE  = "text/xml";
  +
       public static int METHOD_AUTODISCOVERY;
       
       /**
  @@ -32,6 +36,9 @@
        */
       public String resource = null;
   
  +    /**
  +     * The media type of this feed.
  +     */
       public String type = null;
   
       /**
  
  
  
  1.3       +29 -5     jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java
  
  Index: LinkLocator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- LinkLocator.java	16 Apr 2004 20:20:01 -0000	1.2
  +++ LinkLocator.java	4 Aug 2004 22:17:02 -0000	1.3
  @@ -16,11 +16,15 @@
   
   package org.apache.commons.feedparser.locate;
   
  +import org.apache.commons.feedparser.*;
  +
   import java.io.*;
   import java.util.*;
   import java.net.*;
   
   /**
  + * Find links by parsing the raw HTML.  We only return links that are on the
  + * same site and link to /index.rdf LINKS and so forth.
    *
    * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
    */
  @@ -31,7 +35,9 @@
        *
        * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
        */
  -    public static final List locate( String resource, String content, List list )
  +    public static final List locate( String resource,
  +                                     String content,
  +                                     final FeedList list )
           throws Exception {
   
           AnchorParserListener listener = new AnchorParserListener() {
  @@ -40,8 +46,6 @@
                   
                   String site = null;
   
  -                ArrayList list = new ArrayList();
  -
                   HashSet seen = new HashSet();
   
                   public void setContext( Object context ) {
  @@ -92,9 +96,29 @@
   
                       //FIXME: we should assert tha that these feeds are from the SAME
                       //domain not a link to another feed.
  +
  +                    if ( current.endsWith( ".rss" ) ) {
  +
  +                        //this is an RSS feed.
  +                        FeedReference ref = new FeedReference( current,
  +                                                               FeedReference.RSS_MEDIA_TYPE );
  +
  +                        list.add( ref );
  +                        list.setAdRSSFeed( ref );
  +                        
  +                    }
  +
  +                    if ( current.endsWith( ".atom" ) ) {
  +
  +                        FeedReference ref = new FeedReference( current,
  +                                                               FeedReference.ATOM_MEDIA_TYPE );
  +
  +                        list.add( ref );
  +                        list.setAdAtomFeed( ref );
  +
  +                    }
                       
                       if ( current.endsWith( ".xml" ) ||
  -                         current.endsWith( ".rss" ) ||
                            current.endsWith( ".rdf" ) ) {
   
                           //feed for this blog.
  @@ -116,7 +140,7 @@
           listener.setContext( resource );
           AnchorParser.parseAnchors( content, listener );
           
  -        return (List)listener.getResult();
  +        return list;
           
       }
   
  
  
  
  1.4       +2 -2      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
  
  Index: ProbeLocator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- ProbeLocator.java	14 Jul 2004 03:02:55 -0000	1.3
  +++ ProbeLocator.java	4 Aug 2004 22:17:02 -0000	1.4
  @@ -42,7 +42,7 @@
           //FIXME: Live Journal has both FOAF and Atom...  We need support for BOTH
           atomProbeMapping.put( "blogspot.com", "/atom.xml" );
           atomProbeMapping.put( "livejournal.com", "/data/atom" );
  -        
  +
       }
       
       /**
  @@ -68,7 +68,7 @@
               href += mapping;
               
               FeedReference feedReference = new FeedReference( href,
  -                                                             DiscoveryLocator.ATOM_MEDIA_TYPE );
  +                                                             FeedReference.ATOM_MEDIA_TYPE );
                   
               list.add( feedReference );
               list.setAdAtomFeed( feedReference );
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org