You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2004/02/18 21:06:28 UTC
cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test TestAtom.java TestAtom.xml TestFeedParser.java TestFeedParserUTF8.java TestFeedParserUTF8.rss

burton      2004/02/18 12:06:28

  Added:       feedparser/src/java/org/apache/commons/feedparser/impl
                        DebugFeedParserListener.java
               feedparser/src/java/org/apache/commons/feedparser/locate
                        DiscoveryLocator.java FeedLocator.java
                        FeedReference.java package.html
               feedparser/src/java/org/apache/commons/feedparser/output
                        RSS10_OutputFeedParserListener.java
               feedparser/src/java/org/apache/commons/feedparser/test
                        TestAtom.java TestAtom.xml TestFeedParser.java
                        TestFeedParserUTF8.java TestFeedParserUTF8.rss
  Log:
  init of all source code from feedparser
  
  Revision  Changes    Path
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/impl/DebugFeedParserListener.java
  
  Index: DebugFeedParserListener.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "FeedParser", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * ====================================================================
   */
  
  package org.apache.commons.feedparser.impl;
  
  import org.apache.commons.feedparser.*;
  import org.jdom.*;
  
  /**
   *
   * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
   * @version $Id: DebugFeedParserListener.java,v 1.1 2004/02/18 20:06:27 burton Exp $
   */
  public class DebugFeedParserListener extends DefaultFeedParserListener
      implements FeedParserListener {
  
      /**
       * Called prior to event parsing to signal the parsing of a new feed.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void init() {
          System.out.println( "debug: init()" );
      }
  
      public void setContext( Object context ) {}
  
      /**
       * Called when a channel item is found.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void onChannel( FeedParserState state,
                             String title,
                             String link,
                             String description ) {
  
          System.out.println( "debug: onChannel" );
  
      }
  
      public void onChannelEnd() {
  
          System.out.println( "debug: onChannelEnd" );
          
      }
  
      /**
       * Called when an RSS image is found.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void onImage( FeedParserState state,
                           String title,
                           String link,
                           String url ) {
  
          System.out.println( "debug: onImage" );
  
      }
  
      public void onImageEnd() {
          System.out.println( "debug: onImageEnd" );
      }
      
      /**
       * Called when an RSS item or Atom entry is found. 
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void onItem( FeedParserState state,
                          String title,
                          String link,
                          String description,
                          String permalink ) {
  
          System.out.println( "debug: onItem" );
  
          System.out.println( "\ttitle : " + title );
          System.out.println( "\tlink : " + link );
          System.out.println( "\tdescription : " + description );
  
      }
  
      public void onItemEnd() {
          System.out.println( "debug: onItemEnd" );
      }
  
      /**
       * Called when the feed has finished parsing.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void finished() {
          System.out.println( "debug: finished" );
      }
  
  }
  
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java
  
  Index: DiscoveryLocator.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "FeedParser", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * ====================================================================
   */
  
  package org.apache.commons.feedparser.locate;
  
  import java.io.*;
  import java.util.*;
  
  import org.apache.regexp.*;
  
  /**
   *
   * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
   */
  public class DiscoveryLocator {
  
      /**
       * 
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static final List locate( String resource, String content, List list )
          throws Exception {
  
          //this mechanism is easier but it isn't efficient.  I should just parse
          //elements forward until I discover </head>.  Also note that this isn't
          //doing all feed URLs just the first ones it finds.
  
          doDiscovery( "application/atom\\+xml", content, list );
          doDiscovery( "application/rss\\+xml", content, list );
          doDiscovery( "text/xml", content, list );
  
          return list;
          
      }
  
      private static void doDiscovery( String type, String content, List list ) {
  
          String resource = getLinkType( type, content );
  
          if ( resource != null )
              list.add( new FeedReference( resource, type ) );
  
      }
  
      /**
       * Get the first link with the given type.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      private static String getLinkType( String type, String content ) {
  
          RE regexp = new RE( "<link[^>]+type=[\"']" + type + "[\"'][^>]+href=[\"']([^\"']+)" );
  
          //var regexp = new RE( "<link[^>]+type=\"application/rss\\+xml" );
          
          if ( regexp.match( content ) )
              return regexp.getParen( 1 );
          
          return null;
  
      }
  
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java
  
  Index: FeedLocator.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "FeedParser", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * ====================================================================
   */
  
  package org.apache.commons.feedparser.locate;
  
  import org.peerfear.newsmonster.network.*;
  
  import java.io.*;
  import java.util.*;
  
  /**
   *
   * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
   */
  public class FeedLocator {
  
      /**
       * Locate all feeds within the give resource.  The resource should be a link
       * to an (X)HTML document, usually a weblog or a website.
       * 
       * Example: http://peerfear.org
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static final List locate( String resource ) throws Exception {
  
          //Use my network library when it's migrated into Apache.
          
          //fetch content
          ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
  
          String content = request.getInputStreamAsString();
          
          //return resources
          return locate( resource, content );
          
      }
  
      /**
       * Locate the feed with the given content.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public static final List locate( String resource, String content ) throws Exception {
  
          LinkedList list = new LinkedList();
  
          DiscoveryLocator.locate( resource, content, list );
          
          return list;
          
      }
  
      public static void main( String[] args ) throws Exception {
  
          String resource = "http://diveintomark.org";
  
          List l = locate( resource );
  
          Iterator it = l.iterator();
          
          while ( it.hasNext() ) {
  
              FeedReference ref = (FeedReference)it.next();
  
              System.out.println( ref.resource );
              
          }
  
      }
  
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedReference.java
  
  Index: FeedReference.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "FeedParser", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * ====================================================================
   */
  
  package org.apache.commons.feedparser.locate;
  
  import org.peerfear.newsmonster.network.*;
  
  import java.io.*;
  import java.util.*;
  
  /**
   *
   * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
   */
  public class FeedReference {
  
      public String resource = null;
      public String type = null;
  
      public FeedReference( String resource, String type ) {
          this.resource = resource;
          this.type = type;
      }
      
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/package.html
  
  Index: package.html
  ===================================================================
  <body>
  
      <p>
          A feed AutoDiscovery parser which supports both RSS and Atom.
      </p>
  
  </body>
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/output/RSS10_OutputFeedParserListener.java
  
  Index: RSS10_OutputFeedParserListener.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "FeedParser", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * ====================================================================
   */
  
  package org.apache.commons.feedparser.output;
  
  import org.apache.commons.feedparser.*;
  
  import org.jdom.*;
  
  import java.io.*;
  
  /**
   *
   * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
   * @version $Id: RSS10_OutputFeedParserListener.java,v 1.1 2004/02/18 20:06:27 burton Exp $
   */
  public class RSS10_OutputFeedParserListener extends DefaultFeedParserListener
      implements FeedParserListener {
  
      private OutputStream out = null;
      
      /**
       * 
       * Create a new <code>RSS10_OutputFeedParserListener</code> instance.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public RSS10_OutputFeedParserListener( OutputStream out ) {
          this.out = out;
      }
  
      /**
       * Called prior to event parsing to signal the parsing of a new feed.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void init() {}
      public void setContext( Object context ) {}
  
      /**
       * Called when a channel item is found.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void onChannel( FeedParserState state,
                             String title,
                             String link,
                             String description ) {
  
      }
  
      public void onChannelEnd() {
  
      }
          
      /**
       * Called when an RSS image is found.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void onImage( FeedParserState state,
                           String title,
                           String link,
                           String url ) {
  
      }
  
      public void onImageEnd() {
  
      }
      
      /**
       * Called when an RSS item or Atom entry is found. 
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void onItem( FeedParserState state,
                          String title,
                          String link,
                          String description,
                          String permalink ) {
  
      }
  
      public void onItemEnd() {
  
      }
  
      /**
       * Called when the feed has finished parsing.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void finished() {
  
      }
  
  }
  
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestAtom.java
  
  Index: TestAtom.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "FeedParser", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * ====================================================================
   */
  
  package org.apache.commons.feedparser.test;
  
  import java.applet.*;
  import java.util.*;
  import java.io.*;
  import java.net.*;
  import java.security.*;
  
  import org.apache.lucene.index.*;
  import org.apache.lucene.search.*;
  import org.apache.lucene.queryParser.*;
  import org.apache.lucene.analysis.*;
  import org.apache.lucene.analysis.standard.*;
  import org.apache.lucene.document.*;
  
  import org.peerfear.newsmonster.tools.*;
  import org.peerfear.newsmonster.network.*;
  
  import junit.framework.*;
  
  import org.apache.commons.feedparser.*;
  import org.apache.commons.feedparser.impl.*;
  
  /**
   *
   * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
   * @version $Id: TestAtom.java,v 1.1 2004/02/18 20:06:27 burton Exp $
   */
  public class TestAtom extends TestCase {
  
      public TestAtom( String name ) throws Exception {
          super( name );
      }
  
      public void test1() throws Exception {
  
          String resource = "file:///projects/feedparser/src/java/org/peerfear/feedparser/test/TestAtom.xml";
          
          FeedParser parser = FeedParserFactory.newFeedParser();
  
          FeedParserListener listener = new DefaultFeedParserListener() {
  
                  public void onItem( FeedParserState state,
                                      String title,
                                      String weblog,
                                      String description,
                                      String permalink ) throws FeedParserException {
  
                      System.out.println( "title: " + title );
                      System.out.println( "permalink: " + permalink );
                      
                  }
  
                  public void onItemEnd() {}
  
                  public void finished() {}
  
                  // **** MetaFeedParserListener **********************************************
                  
                  public void onSubject( FeedParserState state, String content )
                      throws FeedParserException {
  
                      System.out.println( "subject: " + content );
                      
                  }
  
              };
          
          listener.setContext( this );
          
          ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
          
          parser.parse( listener, request.getInputStream() );
  
      }
  
      public static void main( String[] args ) throws Exception {
  
          TestAtom test = new TestAtom( null );
          
          //test.testGetWeblogLinkForResource();
          test.test1();
  
      }
  
  }
  
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestAtom.xml
  
  Index: TestAtom.xml
  ===================================================================
  <?xml version="1.0" encoding="iso-8859-1"?>
  <feed version="0.3"
      xmlns="http://purl.org/atom/ns#"
      xmlns:dc="http://purl.org/dc/elements/1.1/"
      xml:lang="en">
      <title>dive into mark</title>
      <link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
      <link rel="next" type="application/atom+xml" href="http://diveintomark.org/xml/2004/01/index.atom"/>
      <modified>2004-01-14T07:21:44Z</modified>
      <author>
      <name>Mark Pilgrim</name>
      <url>http://diveintomark.org/</url>
      <email>f8dy@diveintomark.org</email>
      </author>
  
    <tagline>Quando dio vuole castigarci ci manda quello che desideriamo.</tagline>
    <id>tag:diveintomark.org,2004:3</id>
    <generator url="http://www.movabletype.org/" version="2.66">Movable Type</generator>
    <copyright>Copyright (c) 2004, Mark Pilgrim</copyright>
    <entry>
      <title>Thought experiment</title>
      <link rel="alternate" type="text/html" href="http://diveintomark.org/archives/2004/01/14/thought_experiment"/>
      <modified>2004-01-15T20:02:31Z</modified>
      <issued>2004-01-14T02:21:44-05:00</issued>
      <id>tag:diveintomark.org,2004:3.3152</id>
      <created>2004-01-14T07:21:44Z</created>
      <summary type="text/plain">The client is the wrong place to enforce data integrity.  It&apos;s just the wrong place.  If you want to do it, of course I can&apos;t stop you.  But think about who it will hurt.</summary>
      <dc:subject>Web services</dc:subject>
      <content type="text/html" mode="escaped" xml:lang="en" xml:base="http://diveintomark.org/archives/2004/01/14/thought_experiment">
        <![CDATA[<p><a href="http://norman.walsh.name/2004/01/12/postel">Norman Walsh</a> (<a href="http://validator.w3.org/check?uri=http%3A%2F%2Fnorman.walsh.name%2F2004%2F01%2F12%2Fpostel">invalid XML</a>), <a href="http://dannyayers.com/archives/002169.html">Danny Ayers</a> (<a href="http://validator.w3.org/check?uri=http%3A%2F%2Fdannyayers.com%2Farchives%2F002169.html">invalid XML</a>), <a href="http://inessential.com/?comments=1&amp;postid=2770">Brent Simmons</a> (<a href="http://validator.w3.org/check?uri=http%3A%2F%2Finessential.com%2F%3Fcomments%3D1%26postid%3D2770">invalid XML</a>), <a href="http://nick.typepad.com/blog/2004/01/feeddemon_and_w.html">Nick Bradbury</a> (<a href="http://validator.w3.org/check?uri=http%3A%2F%2Fnick.typepad.com%2Fblog%2F2004%2F01%2Ffeeddemon_and_w.html">invalid XML</a>), and <a href="http://bitworking.org/news/There_are_no_exceptions_to_Postel_s_Law_">Joe Gregorio</a> (<a href="http://validator.w3.org/check?uri=http%3A%2F%2Fbitworking.org%2Fnews%2FThere_are_no_exceptions_to_Postel_s_Law_">invalid XML claiming to be HTML</a>) have all denounced me as a heretic for pointing out that, perhaps, rejecting invalid XML on the client side is a bad idea.  The reason I know that they have denounced me is that I read what they had to say, and the reason I was able to read what they had to say is that my browser is very forgiving of all their various XML wellformedness and validity errors.</p>
  
  <p>Tim Bray has chimed in by <a href="http://www.tbray.org/ongoing/When/200x/2004/01/11/PostelPilgrim">calling all of those people names</a>, stating in no uncertain terms that <q cite="http://www.tbray.org/ongoing/When/200x/2004/01/11/PostelPilgrim">anyone who can't make ... well-formed XML is an incompetent fool.</q>  Well, technically he was only talking about syndication feeds, but since XHTML is as much XML as Atom or RSS, I'm pretty sure he would apply the same measurement.  So if you can't make well-formed XML, don't despair; you may be a fool, but you are, if nothing else, in outstanding company.</p>
  
  <p>Rather than call people names, I'd like to propose a thought experiment.</p>
  
  <p>Imagine, if you will, that all web browsers use strict XML parsers.  That is, whenever they encounter an XHTML page, they parse it with a conforming XML parser and refuse to display the page unless it is well-formed XML.  This part of the thought experiment is not terribly difficult to imagine, since Mozilla actually works this way under certain circumstances (if the server sends an XHTML page with the MIME type <code>application/xhtml+xml</code>, instead of the normal <code>text/html</code>).  But imagine that all browsers worked this way, regardless of MIME type.</p>
  
  <p>Now imagine that you were using a publishing tool that prided itself on its standards compliance.  All of its default templates were valid XHTML.  It incorporated a nifty layout editor to ensure that you couldn't introduce any invalid XHTML into the templates yourself.  It incorporated a nifty validating editor to ensure that you couldn't introduce any invalid XHTML into your authored content.  It was all very nifty.</p>
  
  <p>Imagine that you posted a long rant about how this is the way the world should work, that clients should be the gatekeepers of wellformedness, and strictly reject any invalid XML that comes their way.  You click 'Publish', you double-check that your page validates, and you merrily close your laptop and get on with your life.</p>
  
  <p>A few hours later, you start getting email from your readers that your site is broken.  Some of them are nice enough to include a URL, others simply scream at you incoherently and tell you that you suck.  (This part of the thought experiment should not be terribly difficult to imagine either, for anyone who has ever dealt with end-user bug reports.)  You test the page, and lo and behold, they are correct: the page that you so happily and validly authored is now not well-formed, and it not showing up at all in any browser.  You try validating the page with a third-party validator service, only to discover that it gives you an error message you've never seen before and that you don't understand.</p>
  
  <p>You pore through the raw source code of the page and find what you think is the problem, but it's not in your content.  In fact, it's in an auto-generated part of the page that you have no control over.  What happened was, someone linked to you, and when they linked to you they sent a trackback with some illegal characters (illegal for you, not for them, since they declare a different character set than you do).  But your publishing tool had a bug, and it automatically inserted their illegal characters into your carefully and validly authored page, and now all hell has broken loose.</p>
  
  <p>The emails are really pouring in now.  You desperately jump to your administration page to delete the offending trackback, but oh no!  The administration page itself tries to display the trackbacks you've received, and you get an XML processing error.  The same bug that was preventing your readers from reading your published page is now preventing you from fixing it!  You're caught in a catch-22.  And what's worse, your site is part of a completely hosted solution, so you can't even dig into the source or the underlying database and fix it yourself; all the code is locked away on someone else's server, beyond your control.  There's nothing you can do now but fire off a desperate email to your hosting provider and hope they can fix the underlying problem <em>and</em> clean up your bad data.  You know, whenever they get around to it.</p>
  
  <p>All the while, your page is completely inaccessible and visibly broken, and readers are emailing you telling you this over and over again.  And of course the discussion you were trying to start with your eloquent words has come to a screeching halt; no new comments can be added because your comment form is on the same broken page.</p>
  
  <p>Here's the thing: that wasn't a thought experiment; it all really happened.  It's a funny story, actually, because it happened to Nick Bradbury, on the very page where he was <a href="http://nick.typepad.com/blog/2004/01/feeddemon_and_w.html">explaining why it was so important for clients to reject non-wellformed XML</a>.  His original post was valid XHTML, and his surrounding page was valid XHTML, but a trackback came in with a character that wasn't in his character set, and Typepad didn't catch it, and suddenly his page became non-wellformed XML.</p>
  
  <p>Except that none of the rest of it happened, because Nick is not publishing his page as <code>application/xhtml+xml</code>, and browsers are forgiving by default.  And although I did happen to notice the XML wellformedness error and report it, it wasn't a critical problem for him.  I was even able to report the error using the comment form on the broken page itself.  And while SixApart should certainly fix the error, they are free to fix it on their own schedule; it is not a crisis for them or for Nick, no one is being flooded with angry emails, no one is caught in a catch-22 of invalidity.</p>
  
  <p>Want another thought experiment?  Imagine Nick was serving Google ads on that page.  Not so funny anymore, is it?</p>
  
  <p>The client is the wrong place to enforce data integrity.  It's just the wrong place.  I hear otherwise intelligent people claim that <q>if everyone did it, it would be okay.</q>  No, if everyone did it, <em>it would be even worse</em>.  If you want to do it, of course I can't stop you.  But think about who it will hurt.</p>]]>
        
      </content>
    </entry>
    <entry>
      <title>Transitions</title>
      <link rel="alternate" type="text/html" href="http://diveintomark.org/archives/2004/01/12/transitions"/>
      <modified>2004-01-12T17:07:36Z</modified>
      <issued>2004-01-12T12:07:03-05:00</issued>
      <id>tag:diveintomark.org,2004:3.3149</id>
      <created>2004-01-12T17:07:03Z</created>
      <summary type="text/plain">I would like to reiterate how detailed and well-thought-out a plan it was, in a feeble attempt to excuse how magnificently I bungled it.</summary>
      <dc:subject></dc:subject>
      <content type="text/html" mode="escaped" xml:lang="en" xml:base="http://diveintomark.org/archives/2004/01/12/transitions">
        <![CDATA[<p>I am looking for maintainers for the following projects (contact <a href="mailto:mark@diveintomark.org">mark@diveintomark.org</a>):</p>
  
  <ul>
  <li><a href="http://diveintomark.org/projects/feed_finder/">Ultraliberal feed finder</a>.  No known bugs.</li>
  <li><a href="http://diveintomark.org/projects/feed_parser/">Ultraliberal feed parser</a>.  Several pending patches.</li>
  <li><a href="http://diveintomark.org/projects/pyamazon/">PyAmazon</a>.  No known bugs.</li>
  <li><a href="http://diveintomark.org/projects/pyblogger/">PyBlogger</a>.  Currently broken (Blogger changed their API endpoint, and the old endpoint just stopped redirecting recently).</li>
  <li><a href="http://diveintomark.org/projects/pygoogle/">PyGoogle</a>.  One pending patch to work with current SOAP libraries.</li>
  <li><a href="http://diveintomark.org/projects/pymanila/">PyManila</a>.  One pending patch.</li>
  <li><a href="http://diveintomark.org/projects/pytechnorati/">PyTechnorati</a>.  Unsure of current status; has the API changed?</li>
  <li><a href="http://diveintomark.org/projects/pytextile/">PyTextile</a>.  Several pending patches for Textile 1 support, needs to be overhauled to support Textile 2.</li>
  <li><a href="http://diveintomark.org/projects/xfmllib/">xfmllib</a>.  No known bugs.</li>
  </ul>
  
  <p>The following projects are end-of-lifed:</p>
  
  <ul>
  <li><a href="http://diveintomark.org/projects/pysitestats/">PySiteStats</a>.  A quick hack that I no longer use.</li>
  <li><a href="http://diveintomark.org/projects/recommended_reading/">Recommended Reading</a>.  A proof-of-concept; far superior independent implementations are now built into systems like <a href="http://www.bloglines.com/">Bloglines</a>.</li>
  </ul>
  
  <p>My photo galleries are offline until further notice.  They were increasingly expensive to maintain, finally generating over 1 GB of traffic per day in normal interactive use (never mind the occasional robotic abuse).  Now that I have my own server and a new hosting plan, I have more bandwidth available at a lower price, but I don't have that much.</p>
  
  <p>I also had a great plan for transitioning a selection of my older posts to a new site.  I would like to reiterate how detailed and well-thought-out a plan it was, in a feeble attempt to excuse how magnificently I bungled it.  There are now scattered 404s among my archives, but rest assured they will turn into 301s soon enough.  Until then, individual reports of broken links are not especially helpful, since I will be writing scripts to bulk-redirect them all en-masse.</p>]]>
        
      </content>
    </entry>
    <entry>
      <title>There are no exceptions to Postel&apos;s Law</title>
      <link rel="alternate" type="text/html" href="http://diveintomark.org/archives/2004/01/08/postels-law"/>
      <modified>2004-01-11T06:16:01Z</modified>
      <issued>2004-01-08T18:48:52-05:00</issued>
      <id>tag:diveintomark.org,2004:3.3135</id>
      <created>2004-01-08T23:48:52Z</created>
      <summary type="text/plain">Specs are things with important-sounding words like &quot;W3C Recommendation&quot;, &quot;RFC 2616&quot;, or &quot;ISO 8879&quot; at the top.  Weblogs are things with cat pictures at the top.</summary>
      <dc:subject>Web services</dc:subject>
      <content type="text/html" mode="escaped" xml:lang="en" xml:base="http://diveintomark.org/archives/2004/01/08/postels-law">
        <![CDATA[<p>A sure sign that you have been ignoring your weblog is when <a href="http://bugzilla.mozilla.org/show_bug.cgi?id=109553">Mozilla bugs mentioned on your home page</a> get fixed between posts.  Mozilla 1.7 will support the <code>Content-Location:</code> HTTP header.  I would also like to thank <a href="http://ln.hixie.ch/">Ian Hickson</a> for reminding the world that <a href="http://bugzilla.mozilla.org/show_bug.cgi?id=109553#c13">the things I write in this weblog are not spec text</a>.  Apparently there has been some confusion about that.  Specs are things with important-sounding words like <q>W3C Recommendation</q>, <q>RFC 2616</q>, or <q>ISO 8879</q> at the top.  Weblogs are things with cat pictures at the top.  If specs are unclear, they should be fixed by things with <q>errata</q> at the top.  If weblogs are unclear... hey look, cat pictures.</p>
  
  <p>I was hoping to write a followup to my piece on <a href="http://diveintomark.org/archives/2004/01/02/relative-uris">relative URIs in HTML</a> called <q>Relative URIs in XML</q>, which would talk about <a href="http://www.w3.org/TR/xmlbase/">XML Base</a>, but I haven't yet.  It's important though; we're using it in Atom, so it would be nice if everybody knew how it worked.  A short little test suite would not be entirely out of line.  There's <a href="http://www.w3.org/XML/Test/">a comprehensive XML test suite</a> with over 2000 tests, which I believe includes a few tests for XML:Base, but it would be nice to have an Atom-specific one too to show exactly which elements and attributes in a feed can be or contain relative URIs.</p>
  
  <p>There have been a number of unhelpful suggestions recently on the Atom mailing list.  One suggestion was that in the Atom API, we should <q>just</q> use basic HTTP authentication over SSL.  This <a href="http://www.xml.com/pub/a/2003/12/17/dive.html">doesn't work for Bob</a>.  Don't talk to me about Atom authentication until you have read that article.  You can talk to me about man-in-the-middle attacks, you can talk to me about spoofing attacks, you can talk to me about DNS poisoning attacks, but <strong>you can not talk to me about alternatives that do not work for Bob</strong>.</p>
  
  <p>Another suggestion was that we do away with the Atom autodiscovery <code>&lt;link&gt;</code> element and <q>just</q> use an HTTP header, because parsing HTML is perceived as being hard and parsing HTTP headers is perceived as being simple.  This does not work for Bob either, because he has no way to set arbitrary HTTP headers.  It also ignores the fact that the HTML specification explicitly states that <a href="http://www.w3.org/TR/html401/struct/global.html#h-7.4.4.2">all HTTP headers can be replicated at the document level</a> with the <code>&lt;meta http-equiv="..."&gt;</code> element.  So instead of requiring clients to parse HTML, we should <q>just</q> require them to parse HTTP headers... and HTML.</p>
  
  <p>See also: <a href="http://c2.com/cgi/wiki?JustIsaDangerousWord">"just" is a dangerous word</a>, <a href="http://c2.com/cgi/wiki?AlarmBellPhrases">alarm bell phrases</a>.</p>
  
  <p>It also occurs to me that people who say parsing HTML is too hard probably aren't <a href="http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely">sanitizing embedded HTML properly</a>.  <a href="http://www.alpha-geek.com/2004/01/02/example_of_hard_to_parse_html.html">Do not parse HTML with regexes</a>.  Here's an <a href="http://diveintomark.org/public/2004/01/crazy_rss.xml">example feed that illustrates the problem</a>.  My <a href="http://diveintomark.org/projects/feed_parser/version_27.html">Ultraliberal Feed Parser 2.7</a> sanitizes this correctly, and in the true spirit of hacking, I have no sympathy for people who can't be bothered to write code I've already written.</p>
  
  <p>Another entire class of unhelpful suggestions that seems to pop up on a regular basis is unproductive mandates about how producers can produce Atom feeds, or how clients can consume them.  Things like <q>let's mandate that feeds can't use CDATA blocks</q> (runs contrary to the XML specification), or <q>let's mandate that feeds can't contain processing instructions</q> (technically possible, but to what purpose?), or <q>let's mandate that clients can only consume feeds with conforming XML parsers</q>.</p>
  
  <p>This last one is interesting, in that it tries to wish away Postel's Law (originally stated in <a href="http://www.ibiblio.org/pub/docs/rfc/rfc793.txt">RFC 793</a> as <q cite="http://www.ibiblio.org/pub/docs/rfc/rfc793.txt">be conservative in what you do, be liberal in what you accept from others</q>).  Various people have tried to mandate this principle out of existence, some going so far as to claim that <a href="http://essaysfromexodus.scripting.com/postelsLaw">Postel's Law should not apply to XML</a>, because (apparently) the three letters <q>X</q>, <q>M</q>, and <q>L</q> are a magical combination that signal a glorious revolution that somehow overturns the fundamental principles of interoperability.</p>
  
  <p>There are no exceptions to Postel's Law.  Anyone who tries to tell you differently is probably a client-side developer who wants the entire world to change so that their life might be 0.00001% easier.  The world doesn't work that way.</p>
  
  <p>I maintain a <a href="http://diveintomark.org/projects/feed_parser/">feed parser</a>.  Real people rely on it.  It is used in several end-user products, including <a href="http://www.osafoundation.org/Chandler-Product_Roadmap.htm">Chandler</a> and <a href="http://www.nongnu.org/straw/">Straw</a>, and lots of other people use it in their own homegrown aggregators.  It is as liberal as possible because that is what clients need to be.  It handles the 7 different versions of RSS seamlessly and equally.  It handles Atom.  It even goes so far as to try to to abstract away the differences between RSS and Atom, duplicating RSS elements into Atom fields and Atom elements into RSS fields, RSS 2.0 fields into RSS 1.0 fields, and so forth and so on.  If all you care about is title/link/description, you can get that from any feed, even a souped-up Atom feed.  If you want to use the more advanced content model of Atom, you can do that too, even from the most minimal RSS feed.</p>
  
  <p>My feed parser resolves relative links.  It maps non-standard elements to standard ones.  It parses 10 different types of dates and then normalizes them in case somebody claims their latest entry was last modified on June 31st.  It handles many common cases of non-well-formed XML, because many feeds contain XML well-formedness errors, even the feeds of people who should really know better, and a feed parsing library that can't parse the feeds that exist in the real world is simply a waste of everyone's time.  Don't whine to me that parsing feeds is hard.  I know how hard it is.</p>
  
  <p>I also help maintain a <a href="http://feedvalidator.org/">feed validator</a>, and I strongly advocate its use among producers.  It tickles me whenever I go to a new site and see a <q>valid RSS</q> or <q>valid Atom</q> banner.  I have spent an ungodly amount of time making the validator as easy to use as possible and also as strict as possible.  It has almost 1000 test cases backing up its rules; 300 of those were added in the last release alone.  It checks for June 31st.  I swear to God it does.  One day I was writing test cases and <a href="http://intertwingly.net/blog/">Sam</a> was writing code to pass them, and when he saw that test case fail he almost reached through his cable modem and strangled me.  He almost removed the test case out of spite.  He gave in and coded it anyway, and checked it in, and we deployed, and three days later I got a bug report from someone who couldn't figure out why his feed wasn't validating.  And I couldn't figure it out either, until he mentioned that it only seemed to choke on the date for one specific entry, and I looked at it one more time and I swear to God it said <q>2003-06-31</q>.</p>
  
  <p>There are no exceptions to Postel's Law.</p>]]>
        
      </content>
    </entry>
  
  </feed>
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestFeedParser.java
  
  Index: TestFeedParser.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "FeedParser", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * ====================================================================
   */
  
  package org.apache.commons.feedparser.test;
  
  import java.applet.*;
  import java.util.*;
  import java.io.*;
  import java.net.*;
  import java.security.*;
  
  import org.apache.lucene.index.*;
  import org.apache.lucene.search.*;
  import org.apache.lucene.queryParser.*;
  import org.apache.lucene.analysis.*;
  import org.apache.lucene.analysis.standard.*;
  import org.apache.lucene.document.*;
  
  import org.peerfear.newsmonster.tools.*;
  import org.peerfear.newsmonster.network.*;
  
  import junit.framework.*;
  
  import org.apache.commons.feedparser.*;
  import org.apache.commons.feedparser.impl.*;
  
  /**
   *
   * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
   * @version $Id: TestFeedParser.java,v 1.1 2004/02/18 20:06:27 burton Exp $
   */
  public class TestFeedParser extends TestCase {
  
      public TestFeedParser( String name ) throws Exception {
          super( name );
      }
  
      public void testParseOPML() throws Exception {
  
          String resource = "http://weblog.infoworld.com/udell/gems/mySubscriptions.opml";
          
          FeedParser parser = FeedParserFactory.newFeedParser();
  
          FeedParserListener listener = new DefaultFeedDirectoryParserListener() {
  
                  public void onItem( FeedParserState state,
                                      String title,
                                      String weblog,
                                      String description,
                                      String feed ) throws FeedParserException {
  
                      System.out.println( "title" + title );
                      System.out.println( "feed: " + feed );
                      
                  }
  
                  public void onItemEnd() {}
  
                  public void finished() {}
                  
              };
  
              listener.setContext( this );
              
              ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
  
              parser.parse( listener, request.getInputStream() );
  
      }
  
      public static void main( String[] args ) {
  
          try { 
  
              TestFeedParser test = new TestFeedParser( null );
  
              //test.testGetWeblogLinkForResource();
              test.testParseOPML();
              
          } catch ( Throwable t ) {
              
              t.printStackTrace();
              
          }
          
      }
  
  }
  
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestFeedParserUTF8.java
  
  Index: TestFeedParserUTF8.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "FeedParser", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * ====================================================================
   */
  
  package org.apache.commons.feedparser.test;
  
  import java.applet.*;
  import java.util.*;
  import java.io.*;
  import java.net.*;
  import java.security.*;
  
  import org.apache.lucene.index.*;
  import org.apache.lucene.search.*;
  import org.apache.lucene.queryParser.*;
  import org.apache.lucene.analysis.*;
  import org.apache.lucene.analysis.standard.*;
  import org.apache.lucene.document.*;
  
  import org.peerfear.newsmonster.tools.*;
  
  import junit.framework.*;
  
  import org.apache.commons.feedparser.*;
  import org.apache.commons.feedparser.impl.*;
  
  /**
   *
   * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
   * @version $Id: TestFeedParserUTF8.java,v 1.1 2004/02/18 20:06:27 burton Exp $
   */
  public class TestFeedParserUTF8 extends TestCase {
  
      public TestFeedParserUTF8( String name ) throws Exception {
          super( name );
      }
  
      public void testParse() throws Exception {
  
          FeedParser parser = FeedParserFactory.newFeedParser();
  
          FeedParserListener listener = new DebugFeedParserListener();
  
          parser.parse( listener, new FileInputStream( "src/java/ksa/test/TestFeedParserUTF8.rss" ) );
          
      }
  
      public static void main( String[] args ) {
  
          try { 
  
              TestFeedParserUTF8 test = new TestFeedParserUTF8( null );
  
              //test.testGetWeblogLinkForResource();
              test.testParse();
              
          } catch ( Throwable t ) {
              
              t.printStackTrace();
              
          }
          
      }
  
  }
  
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestFeedParserUTF8.rss
  
  Index: TestFeedParserUTF8.rss
  ===================================================================
  <?xml version="1.0" encoding="UTF-8"?>
  
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/" xmlns="http://purl.org/rss/1.0/">
  
  <channel rdf:about="http://soan.jp/">
  <title>Soanblog 創庵</title>
  <link>http://soan.jp/</link>
  <description>アジアンカフェ「&lt;a href=&quot;http://www.chakama.com/&quot;&gt;茶香間&lt;/a&gt;」も営業中！</description>
  <dc:language>en-us</dc:language>
  <dc:creator></dc:creator>
  <dc:date>2003-10-22T17:50:12+09:00</dc:date>
  <admin:generatorAgent rdf:resource="http://www.movabletype.org/?v=2.51" />
  
  <items>
  <rdf:Seq><rdf:li rdf:resource="http://soan.jp/archives/000508.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000507.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000505.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000504.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000503.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000502.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000500.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000499.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000498.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000497.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000485.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000496.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000495.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000494.html" />
  <rdf:li rdf:resource="http://soan.jp/archives/000491.html" />
  </rdf:Seq>
  </items>
  
  </channel>
  
  <item rdf:about="http://soan.jp/archives/000508.html">
  <title>手書きでサイト構築「デニム」</title>
  <link>http://soan.jp/archives/000508.html</link>
  <description>手書きでサイト構築をしていく構造化ツールGroup for User Interface Research - DENIMがバージョンアップしてました。 （とはいえ、今年の３月ですが・・・・古い） とりあえずメモ。～デモビデオ（RealPlayer）...</description>
  <dc:subject>SOFTWARE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-22T17:50:12+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000507.html">
  <title>話しながら作るプレゼンテーションソフト</title>
  <link>http://soan.jp/archives/000507.html</link>
  <description>ことだま タブレットＰＣを使い、音声入力と文字認識をバックグラウンドで使いながら、プレゼン資料を作成するソフトウェア。 音声や文字認識による入力面での革新性だけでなく、位置関係や矢印などの関係性などもすべてドラックするだけで表記され、まさに話をしながら、構造をくみあげていけます。（・・・こういうアイデアってワクワクします。） 詳しくは、デモビデオ（３０Ｍ）にとてもわかりやすく解説されています。 早速タブレットＰＣに入れて活用してレビューしてみたいと思います。...</description>
  <dc:subject>SOFTWARE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-22T17:40:08+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000505.html">
  <title>赤坂うまや</title>
  <link>http://soan.jp/archives/000505.html</link>
  <description>赤坂うまやでＫＮＮ神田さんのイベント。 スケジュール的に行けそうにない予定だったのですが、ぎりぎり間に合い（いや、間に合ってないか）なんとか合流しました。 この赤坂のうまや。ロケーションがなんと神社の鳥居をくぐって、裏道のようなどんつきにあり、まるで神社の境内にあるバーのような演出で風情があって粋でした。 （写真は玄関入ってすぐのところ） イベントに足を踏み入れたときは、会場もすでに出来上がっていて、なかなかの盛り上がりようでした。 その後、家に送りがてら、外苑前のカフェ「オフィス」で、ちょっと朝３時くらいまで語らいあってました。 最近夜遊びがすぎるかな・・・...</description>
  <dc:subject>CAFE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-22T03:38:47+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000504.html">
  <title>TILES CAFE</title>
  <link>http://soan.jp/archives/000504.html</link>
  <description>今日はひさびさにＮＨＫの西門の前にある、Tiles Cafeにいきました。 あいかわらず、ここの角煮や焼きそば、ゴーヤチャンプルは美味しい！気のおけない知人と語らうにはホントいい場所です。 カフェ作るときに、何度も行ってはお店の人にさりげなくインタビューして勉強させてもらった場所。 オススメです。 〒150-0047 東京都渋谷区神山町5-3 スパイシービル1F 03-5453-1288...</description>
  <dc:subject>CAFE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-21T01:51:19+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000503.html">
  <title>コメント欄を狙ったスパムの対策</title>
  <link>http://soan.jp/archives/000503.html</link>
  <description>最近、ＭＴのコメントを狙ったコメントスパムや、ＭＴのコメント欄にあるメールアドレスを抜き取って、ダイレクトメールを送るのが増えているようです。 ■コメントスパム コメントスパムは、来るたびに、kengoさんとメッセンジャーで共有してますが、kengoさんが詳しくまとめてくれています。 ★Preston Wiki - コメントスパム こちらにあるＩＰアドレスは、このサイトにくるものとほとんど一緒なので、ＭＴの設定の「禁止ＩＰ」に入れておくといいと思います。 ■スパム（コメントに書いたメールアドレスが取られる） こちらは、ＭＴに限らず、ウェブサイトに自分のアドレスを書き込んだものが、スパム業者に勝手にもっていかれ、見知らぬ海外のダイレクトメールなどを受け取るというもの。 自衛手段として、いつでも変えられるフリーのメールアドレス（hotmailやyahooメールなど）で書き込む（これもイタチごっこになってしまいますが）がいいと思います。 また、ＭＴの設定側での対策として、テンプレートのコメントテンプレートの設定にある、posted by ～のところにあるＭＴタグを ＜$MTCommentAuthorLink spam_protect=&quot;1&quot; show_email=&quot;0&quot;$＞ にしておくと、メールアドレスのみの投稿者のアドレスを非表示にすることができます。 最近その手のＤＭを受け取る人が増えているようなので、メールアドレスを非表示にしました。 コメントスパムも、メールのスパムや、トラックバックを狙ったものなど色々あると思いますが、せっかく楽しく広がりつつあるブロッグにあまり歓迎されるものではないですよね。被害が増えないことを祈ります。...</description>
  <dc:subject>MOVABLETYPE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-20T00:01:11+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000502.html">
  <title>LOOPS001</title>
  <link>http://soan.jp/archives/000502.html</link>
  <description>LOOPS001 130 bpm [REASON + ACID] Sampling with Rhodes Pad. HOUSE...</description>
  <dc:subject>OTHER</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-19T14:41:51+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000500.html">
  <title>花粉症</title>
  <link>http://soan.jp/archives/000500.html</link>
  <description>突然、昨日鼻水とくしゃみがとまらなくなり、目がかゆくなる症状が・・・花粉症か？！季節外れのような・・・・気のせいでありますように。 花粉症＊ナビ ― 花粉症カレンダー...</description>
  <dc:subject>DIARY</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-17T13:53:17+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000499.html">
  <title>iTunes for windows</title>
  <link>http://soan.jp/archives/000499.html</link>
  <description>Apple - iTunesを早速ダウンロード。 いい！でもCOULDさんも言ってましたが、たしかにMacに移行する理由がひとつ減ってしまったのは複雑な気分・・ WinAmpだってWMPだって、RealOneだって、mp3プレーヤーはあるのに、なぜかこれが一番使い勝手よく感じます。（マック贔屓だからというのもあるけど） ＰＣに入っているのは、同じmp3ファイルなのに、アプリケーションを変えただけで、新たな出会いを感じさせてくれる・・。インターフェイスの効能でしょうか。...</description>
  <dc:subject>SOFTWARE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-17T13:29:24+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000498.html">
  <title>[FLASH]紙的な映像</title>
  <link>http://soan.jp/archives/000498.html</link>
  <description>WEFAIL　[via pya!] 不思議な紙ムービー表現。 こちらも同じくpya!さんから・・PENTRIX 新しいマトリックスムービーかと思いきや・・・...</description>
  <dc:subject>FAN WEBSITE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-17T00:37:35+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000497.html">
  <title>[PHOTO FRIDAY] Translucent</title>
  <link>http://soan.jp/archives/000497.html</link>
  <description> Photo Friday - The Weekly Photo Challenge is &apos;Translucent&apos;. いきなり大雨が降って、雨宿り中。...</description>
  <dc:subject>PHOTO FRIDAY</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-16T14:34:20+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000485.html">
  <title>クサイせりふジェネレーター</title>
  <link>http://soan.jp/archives/000485.html</link>
  <description>クサイせりふジェネレータ とっさに愛の告白をしなくてはならないときに、携帯で検索。 （ひさびさに再発見）...</description>
  <dc:subject>FAN WEBSITE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-15T21:28:37+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000496.html">
  <title>奇妙な楽譜</title>
  <link>http://soan.jp/archives/000496.html</link>
  <description>Score Gallery～芸術的な楽譜。 音を視覚的に記述していくと、こんな記号になるのか・・。...</description>
  <dc:subject>FAN WEBSITE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-15T14:18:33+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000495.html">
  <title>TVチューナ内蔵携帯電話[NEC]</title>
  <link>http://soan.jp/archives/000495.html</link>
  <description>NEC、初のTVチューナ内蔵携帯電話を12月に市場導入～とうとう出ますね。テレビが見られる携帯電話。 外部入力もあるとのこと。期待大。...</description>
  <dc:subject>GADGET</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-15T03:26:49+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000494.html">
  <title>FLASH不思議な世界「チェコアニメ」</title>
  <link>http://soan.jp/archives/000494.html</link>
  <description>知人に教わりました、SAMOROSTというＦＬＡＳＨサイト。Czech Republic（チェコ共和国）のアニメーション。 チェコアニメって世界観がとても好きで、独特の雰囲気を感じさせてくれます。 （色々なところクリックして、先に進んでみてください・・・パズル感覚で楽しいです） そういえば、以前「イタチョコ・システム」（iTachoco System）の作品をマックのフロッピーで買ったことがあり、近い世界観を感じます。・・・（イタチョコのチョコはチェコだったり・・・？勝手な想像ですみません。） もうちょっと明るくなったのがティムバートンでしょうか。 チェコアニメ作品　ビデオ・DVD案内...</description>
  <dc:subject>FAN WEBSITE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-15T02:39:29+09:00</dc:date>
  </item>
  <item rdf:about="http://soan.jp/archives/000491.html">
  <title>TRPGカフェ</title>
  <link>http://soan.jp/archives/000491.html</link>
  <description>屋上暇人倶楽部: TABLETALK CAFE...</description>
  <dc:subject>CAFE</dc:subject>
  <dc:creator>hirosh</dc:creator>
  <dc:date>2003-10-09T13:28:08+09:00</dc:date>
  </item>
  
  
  </rdf:RDF>
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org