You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2005/01/23 10:49:50 UTC

cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate ProbeLocator.java

burton      2005/01/23 01:49:50

  Modified:    feedparser/src/java/org/apache/commons/feedparser
                        AtomFeedParser.java FeedFilter.java FeedParser.java
                        MetaFeedParser.java NS.java RSSFeedParser.java
               feedparser/src/java/org/apache/commons/feedparser/impl
                        DebugFeedParserListener.java
               feedparser/src/java/org/apache/commons/feedparser/locate
                        ProbeLocator.java
  Log:
  support for tags
  
  Revision  Changes    Path
  1.17      +2 -1      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/AtomFeedParser.java
  
  Index: AtomFeedParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/AtomFeedParser.java,v
  retrieving revision 1.16
  retrieving revision 1.17
  diff -u -r1.16 -r1.17
  --- AtomFeedParser.java	19 Jan 2005 06:22:15 -0000	1.16
  +++ AtomFeedParser.java	23 Jan 2005 09:49:50 -0000	1.17
  @@ -180,6 +180,7 @@
               doContent( state, listener, child );
   
               MetaFeedParser.parse( listener, state );
  +            TagFeedParser.parse( listener, state );
               
               listener.onItemEnd();
               doLocale( state, listener, child );
  
  
  
  1.7       +27 -1     jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedFilter.java
  
  Index: FeedFilter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedFilter.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- FeedFilter.java	20 Jan 2005 09:00:37 -0000	1.6
  +++ FeedFilter.java	23 Jan 2005 09:49:50 -0000	1.7
  @@ -21,6 +21,8 @@
   import java.util.*;
   import java.util.regex.*;
   
  +import org.apache.log4j.Logger;
  +
   /**
    *
    * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
  @@ -28,6 +30,8 @@
    */
   public class FeedFilter {
   
  +    private static Logger log = Logger.getLogger( FeedFilter.class );
  +
       public static boolean DO_REMOVE_LEADING_PROLOG = true;
       public static boolean DO_DECODE_ENTITIES = true;
   
  @@ -54,6 +58,8 @@
       public static byte[] parse( String content, String encoding )
           throws Exception {
   
  +        //FIXME: return an object here so that I can flag a bozo bit.
  +        
           //remove leading prolog...
           if ( DO_REMOVE_LEADING_PROLOG )
               content = doRemoveLeadingProlog( content, encoding );
  @@ -87,6 +93,7 @@
   
           if ( begin > 0 ) {
               content = content.substring( begin, content.length() );
  +            log.warn( "Skipped whitespace in prolog and moved towards first element." );
           }
   
           //now skip to the XML processing instruction when necessary.  This is
  @@ -96,6 +103,7 @@
   
           if ( begin > 0 ) {
               content = content.substring( begin, content.length() );
  +            log.warn( "Removed prolog towards first processing instruction." );
           }
   
           content = doRemoveElementProlog( content );
  @@ -147,6 +155,9 @@
   
           int begin = 0;
   
  +        boolean hasFilterDecodedEntities = false;
  +        boolean hasFilterFoundUnknownEntity = false;
  +        
           while ( m.find() ) {
   
               buff.append( content.substring( begin, m.start() ) );
  @@ -159,10 +170,19 @@
                   buff.append( "&#" );
                   buff.append( value );
                   buff.append( ";" );
  +
  +                hasFilterDecodedEntities = true;
  +
               } else {
  +
  +                //This is not a known entity so we have no way to correct it.
  +                //If this is done then we have a problem and the feed probably
  +                //still won't parse
                   buff.append( "&" );
                   buff.append( entity );
                   buff.append( ";" );
  +
  +                hasFilterFoundUnknownEntity = true;
               }
   
               begin = m.end( 0 );
  @@ -170,6 +190,12 @@
           } 
   
           buff.append( content.substring( begin, content.length() ) );
  +
  +        if ( hasFilterFoundUnknownEntity ) 
  +            log.warn( "Filter encountered unknown entities" );
  +
  +        if ( hasFilterDecodedEntities ) 
  +            log.warn( "Filter has decoded latin1 entities." );
   
           return buff.toString();
           
  
  
  
  1.12      +8 -3      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedParser.java
  
  Index: FeedParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedParser.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- FeedParser.java	20 Jan 2005 09:00:37 -0000	1.11
  +++ FeedParser.java	23 Jan 2005 09:49:50 -0000	1.12
  @@ -183,10 +183,15 @@
               //to parse here.  (also I wonder if this could be a chance to do
               //autodiscovery).
               
  -            //fall back on RDF and RSS
  +            //fall back on RDF and RSS parsing.
   
  -            RSSFeedParser.parse( listener, doc );
  +            //FIXME: if this is an UNKNOWN format We need to throw an
  +            //UnsupportedFeedxception (which extends FeedParserException)
  +            //
  +            // In this situation the ROOT elements should be: rss or RDF
               
  +            RSSFeedParser.parse( listener, doc );
  +
           } catch ( FeedParserException fpe ) {
               //if an explicit FeedParserException is thrown just rethrow it..
               throw fpe;
  
  
  
  1.3       +2 -2      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/MetaFeedParser.java
  
  Index: MetaFeedParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/MetaFeedParser.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- MetaFeedParser.java	28 Jun 2004 19:59:47 -0000	1.2
  +++ MetaFeedParser.java	23 Jan 2005 09:49:50 -0000	1.3
  @@ -27,7 +27,7 @@
   import org.jaxen.jdom.*;
   
   /**
  - * Handles parsing RSS .
  + * Handles parsing RSS metadata including dates
    *
    * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
    * @version $Id$
  
  
  
  1.5       +7 -1      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/NS.java
  
  Index: NS.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/NS.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- NS.java	22 Aug 2004 11:28:04 -0000	1.4
  +++ NS.java	23 Jan 2005 09:49:50 -0000	1.5
  @@ -72,6 +72,12 @@
       public static final Namespace ATOM =
           Namespace.getNamespace( "atom", "http://purl.org/atom/ns#" );
   
  +    /**
  +     * mod_taxonomy
  +     */
  +    public static final Namespace TAXO =
  +        Namespace.getNamespace( "taxo", "http://purl.org/rss/1.0/modules/taxonomy/" );
  +
       public static SimpleNamespaceContext context = null;
   
       static {
  
  
  
  1.15      +5 -4      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java
  
  Index: RSSFeedParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java,v
  retrieving revision 1.14
  retrieving revision 1.15
  diff -u -r1.14 -r1.15
  --- RSSFeedParser.java	29 Dec 2004 02:18:21 -0000	1.14
  +++ RSSFeedParser.java	23 Jan 2005 09:49:50 -0000	1.15
  @@ -92,7 +92,7 @@
               state.current = item;
                   
               doLocale( state, listener, item );
  -            doParseItem( listener, state );
  +            doItem( listener, state );
               doLocaleEnd( state, listener, item );
   
           }
  @@ -148,8 +148,8 @@
        *
        * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
        */
  -    private static void doParseItem( FeedParserListener listener,
  -                                     FeedParserState state ) throws Exception {
  +    private static void doItem( FeedParserListener listener,
  +                                FeedParserState state ) throws Exception {
   
           String resource = null;
   
  @@ -257,6 +257,7 @@
           }
   
           MetaFeedParser.parse( listener, state );
  +        TagFeedParser.parse( listener, state );
           
           listener.onItemEnd();
           
  
  
  
  1.12      +17 -2     jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/impl/DebugFeedParserListener.java
  
  Index: DebugFeedParserListener.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/impl/DebugFeedParserListener.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- DebugFeedParserListener.java	20 Jan 2005 09:00:37 -0000	1.11
  +++ DebugFeedParserListener.java	23 Jan 2005 09:49:50 -0000	1.12
  @@ -32,7 +32,8 @@
       implements FeedParserListener,
                  FeedDirectoryParserListener,
                  MetaFeedParserListener,
  -               LinkFeedParserListener {
  +               LinkFeedParserListener,
  +               TagFeedParserListener {
   
       PrintStream out = System.out;
   
  @@ -204,6 +205,20 @@
   
       public void onRelationEnd() {
           out.println( "onRelationEnd" );
  +    }
  +
  +    // **** TagFeedParserListener ***********************************************
  +
  +    public void onTag( FeedParserState state,
  +                       String tag,
  +                       String tagspace ) throws FeedParserException {
  +
  +        out.println( "onTag: tag: " + tag + " tagspace: " + tagspace );
  +
  +    }
  +
  +    public void onTagEnd() throws FeedParserException {
  +        out.println( "onTagEnd" );
       }
   
   } 
  
  
  
  1.19      +1 -0      jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
  
  Index: ProbeLocator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
  retrieving revision 1.18
  retrieving revision 1.19
  diff -u -r1.18 -r1.19
  --- ProbeLocator.java	20 Jan 2005 09:00:37 -0000	1.18
  +++ ProbeLocator.java	23 Jan 2005 09:49:50 -0000	1.19
  @@ -163,6 +163,7 @@
        *  @author Brad Neuberg, bkn3@columbia.edu
        */
       protected static boolean feedExists(String resource) throws Exception {
  +
           ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
   
           request.setRequestMethod( "HEAD" );
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org