You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2005/01/18 20:39:36 UTC

cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate AnchorParser.java DiscoveryLocator.java EntityDecoder.java FeedLocator.java ProbeLocator.java

burton      2005/01/18 11:39:36

  Modified:    feedparser build.xml
               feedparser/src/java/org/apache/commons/feedparser/locate
                        AnchorParser.java DiscoveryLocator.java
                        EntityDecoder.java FeedLocator.java
                        ProbeLocator.java
  Log:
  Fixed major bug in our anchor parser that would actually cause the page stop being parsed....
  
  Revision  Changes    Path
  1.11      +4 -0      jakarta-commons-sandbox/feedparser/build.xml
  
  Index: build.xml
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/build.xml,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- build.xml	22 Oct 2004 00:37:08 -0000	1.10
  +++ build.xml	18 Jan 2005 19:39:36 -0000	1.11
  @@ -119,7 +119,9 @@
                classname="org.apache.commons.feedparser.locate.TestFeedLocator"
                fork="true"
                failonerror="true">
  +
               <sysproperty key="feedparser.home" value="${feedparser.home}"/>
  +
          </java>
   
       </target>
  @@ -131,7 +133,9 @@
                classname="org.apache.commons.feedparser.test.TestProbeLocator"
                fork="true"
                failonerror="true">
  +
             <sysproperty key="feedparser.home" value="${feedparser.home}"/>
  +
          </java>
   
       </target>
  
  
  
  1.6       +50 -8     jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/AnchorParser.java
  
  Index: AnchorParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/AnchorParser.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- AnchorParser.java	29 Dec 2004 02:18:21 -0000	1.5
  +++ AnchorParser.java	18 Jan 2005 19:39:36 -0000	1.6
  @@ -22,7 +22,8 @@
   
   /**
    *
  - * Given HTML pull out an array of anchors
  + * Given a string of HTML content, parse out anchors and fire events with all
  + * the data when they are found.
    *
    * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
    */
  @@ -51,26 +52,67 @@
   
           int index = 0;
   
  -        //FIXME: what if href isn't the first attribute?  It will fail here...
  +        //FIXME: what if href isn't the first attribute?  Will it fail here???
  +
  +        //FIXME: how do we pass back the content of the href: <a href=''> this
  +        //is the content </a> which would pass a string "this is the content"
   
           Matcher m = pattern.matcher( content );
           
  -        LinkedList list = new LinkedList();
  -        
           while ( m.find() ) {
   
  -            //expand this link
  +            HashMap map = DiscoveryLocator.getAttributes( m.group( 0 ) );
   
               String resource = EntityDecoder.decode( m.group( 1 ) );
  -            String title = EntityDecoder.decode( m.group( 2 ).trim() );
  +            //String title = EntityDecoder.decode( m.group( 2 ).trim() );
   
  +            String title = (String)map.get( "title" );
  +
  +            if ( title != null ) {
  +                title = title.trim();
  +                title = EntityDecoder.decode( title );
  +            }
  +                
  +            String rel = (String)map.get( "rel" );
  +            
               if ( resource == null || resource.equals( "" ) )
  -                return;
  +                continue;
               
  -            if ( ! listener.onAnchor( resource, null, title ) )
  +            if ( ! listener.onAnchor( resource, rel, title ) )
                   return;
   
           } 
  +
  +    }
  +
  +    public static void main( String[] args ) throws Exception {
  +
  +        AnchorParserListener listener = new AnchorParserListener() {
  +
  +                public boolean onAnchor( String href, String rel, String title ) {
  +
  +                    System.out.println( "href: " + href );
  +                    System.out.println( "rel: " + rel );
  +                    System.out.println( "title: " + title );
  +                    return true;
  +                }
  +
  +                public Object getResult() {
  +                    return null;
  +                }
  +                public void setContext( Object context ) {}
  +                
  +            };
  +
  +        //FIXME: won't work with single quotes
  +        //FIXME: won't work with <a />
  +        //parse( "<a href=\"http://peerfear.org\" rel=\"linux\" title=\"linux\" >adf</a>", listener );
  +
  +        //parse( "<a rel=\"linux\" href=\"http://peerfear.org\" title=\"linux\" >adf</a>", listener );
  +        //parse( "<a title=\"linux\" rel=\"linux\" href=\"http://peerfear.org\" >adf</a>", listener );
  +
  +        parse( "<a href='http://peerfear.org' rel='linux' title='linux' >adf</a>", listener );
  +
   
       }
   
  
  
  
  1.14      +2 -2      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java
  
  Index: DiscoveryLocator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- DiscoveryLocator.java	20 Aug 2004 21:44:06 -0000	1.13
  +++ DiscoveryLocator.java	18 Jan 2005 19:39:36 -0000	1.14
  @@ -134,11 +134,11 @@
        *
        * @author <a href="mailto:burton@rojo.com">Kevin A. Burton</a>
        */
  -    public static HashMap getAttributes( String link ) {
  +    public static HashMap getAttributes( String content ) {
   
           HashMap map = new HashMap();
   
  -        Matcher m = attr_pattern.matcher( link );
  +        Matcher m = attr_pattern.matcher( content );
   
           int index = 0;
   
  
  
  
  1.4       +5 -4      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/EntityDecoder.java
  
  Index: EntityDecoder.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/EntityDecoder.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- EntityDecoder.java	29 Dec 2004 02:18:21 -0000	1.3
  +++ EntityDecoder.java	18 Jan 2005 19:39:36 -0000	1.4
  @@ -23,9 +23,10 @@
   
   /**
    *
  - * Given a piece of HTML we will decode the entities it contains.  This is a
  - * trivial implementation and we need to go through and make sure all HTML
  - * entities are escaped correctly.
  + * Given a string of HTML content we decode the entities it contains.
  + *
  + * NOTE: Currently this is a trivial implementation and we need to go through
  + * and make sure all HTML entities are correctly supported.
    * 
    * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
    * @version $Id$
  
  
  
  1.25      +0 -2      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java
  
  Index: FeedLocator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v
  retrieving revision 1.24
  retrieving revision 1.25
  diff -u -r1.24 -r1.25
  --- FeedLocator.java	22 Oct 2004 00:37:08 -0000	1.24
  +++ FeedLocator.java	18 Jan 2005 19:39:36 -0000	1.25
  @@ -104,13 +104,11 @@
           //String resource = "file:///projects/feedparser/tests/locate5.html";
           //String resource = "file:///projects/feedparser/tests/locate6.html";
   
  -        
           //FIXME: add UNIT TESTS for Yahoo Groups and Flickr
           String resource = "http://groups.yahoo.com/group/aggregators/";
   
           //String resource = "http://flickr.com/photos/tags/cats";
   
  -        
           //String resource = "file:///projects/feedparser/tests/locate8.html";
   
           //String resource = "http://blogs.sun.com/roller/page/gonzo";
  
  
  
  1.17      +2 -2      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
  
  Index: ProbeLocator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
  retrieving revision 1.16
  retrieving revision 1.17
  diff -u -r1.16 -r1.17
  --- ProbeLocator.java	22 Oct 2004 00:37:08 -0000	1.16
  +++ ProbeLocator.java	18 Jan 2005 19:39:36 -0000	1.17
  @@ -79,9 +79,9 @@
           // fail-fast if we already have some results and if we determine that
           // we can trust the results (TextAmerica has invalid autodiscovery,
           // for example)
  -        if ( list.size() > 0 && blogService.hasValidAutoDiscovery() )
  +        if ( list.size() > 0 && blogService.hasValidAutodiscovery() )
               return list;
  -        else if ( blogService.hasValidAutoDiscovery() == false ) {
  +        else if ( blogService.hasValidAutodiscovery() == false ) {
               // clear out the list so far since we can't trust the results
               list.clear();
           }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org