You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2004/04/15 18:58:15 UTC

cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate LinkLocator.java

burton      2004/04/15 09:58:15

  Added:       feedparser/src/java/org/apache/commons/feedparser/locate
                        LinkLocator.java
  Log:
  more feedlocator support...
  
  Revision  Changes    Path
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java
  
  Index: LinkLocator.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.locate;
  
  import java.io.*;
  import java.util.*;
  import java.net.*;
  
  /**
   *
   * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
   */
  public class LinkLocator {
  
      /**
       * 
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static final List locate( String resource, String content, List list )
          throws Exception {
  
          AnchorParserListener listener = new AnchorParserListener() {
  
                  String resource = null;
                  
                  String site = null;
  
                  ArrayList list = new ArrayList();
  
                  HashSet seen = new HashSet();
  
                  public void setContext( Object context ) {
  
                      resource = (String)context;
                      
                      //pass in the resource of the blog
                      site = getSite( resource );
                      
                  }
  
                  public Object getResult() {
                      return list;
                  }
                  
                  public boolean onAnchor( String href, String rel, String title ) {
  
                      String current = ResourceExpander.expand( resource, href );
  
                      if ( current == null )
                          return true; //obviously not
  
                      //this is on a different site.
                      if ( ! getSite( current ).equals( site ) ) {
                          return true;
                      }
  
                      //Radio style feed.  Screw that.
                      if ( current.startsWith( "http://127" ) ) 
                          return true;
  
                      if ( seen.contains( current ) ) {
                          return true;
                      } 
  
                      seen.add( current );
  
                      //FIXME: we should assert tha that these feeds are from the SAME
                      //domain not a link to another feed.
                      
                      if ( current.endsWith( ".xml" ) ||
                           current.endsWith( ".rss" ) ||
                           current.endsWith( ".rdf" ) ) {
  
                          //feed for this blog.
                          list.add( current );
                          return true;
                          
                      } 
  
                      //for coderman's blog at http://www.peertech.org
                      if ( current.endsWith( "/node/feed" ) )
                          list.add( current );
  
                      return true;
                      
                  }
  
              };
  
          listener.setContext( resource );
          AnchorParser.parseAnchors( content, listener );
          
          return null;
          
      }
  
      public static String getSite( String resource ) {
  
          try {
  
              String site = new URL( resource ).getHost();
              return site.replaceAll( "http://www", "http://" );
              
          } catch ( MalformedURLException e ) {
              return null;
          }
          
      }
  
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org