You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@commons.apache.org by Brad Neuberg <bk...@columbia.edu> on 2004/09/04 01:32:48 UTC

[feedparser] Attaching patch again

It looks like attachments are stripped out of the mailing list; here is the 
patch in unified diff format:

? compile
? feedparser.patch
? lib
Index: src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
===================================================================
RCS file: 
/home/cvspublic/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
retrieving revision 1.10
diff -u -B -r1.10 ProbeLocator.java
--- src/java/org/apache/commons/feedparser/locate/ProbeLocator.java     2 
Sep 2004 00:36:25 -0000       1.10
+++ src/java/org/apache/commons/feedparser/locate/ProbeLocator.java     3 
Sep 2004 23:27:52 -0000
@@ -69,7 +69,7 @@
      /**
       * A regex to find any trailing filename and strip it
       */
-    private static Pattern patternToStrip = Pattern.compile("/\\w*\\.\\w*$");
+    private static Pattern patternToStrip = 
Pattern.compile("[^/](/\\w*\\.\\w*$)");

      /**
       * A regex to extract the user from a Xanga URL
@@ -143,7 +143,8 @@
                new FeedReference("index.rss", FeedReference.RSS_MEDIA_TYPE),
                new FeedReference("rss.xml", FeedReference.RSS_MEDIA_TYPE),
                new FeedReference("index.rdf", FeedReference.RSS_MEDIA_TYPE),
-              new FeedReference("index.xml", FeedReference.XML_MEDIA_TYPE) };
+              new FeedReference("index.xml", FeedReference.RSS_MEDIA_TYPE),
+              new FeedReference("xml/rss.xml", 
FeedReference.RSS_MEDIA_TYPE) };

          probeMapping.put( BlogService.BLOSXOM,        blosxomLocations );

@@ -192,6 +193,7 @@

                  mapping = (FeedReference[])probeMapping.get( blogService );
                  log.info( "mapping = " + mapping );
+                log.info( "baseFeedPath = " + baseFeedPath );

                  // try out each mapping
                  for (int i = 0; i < mapping.length; i++) {
@@ -285,7 +287,9 @@
       *  and gets the path necessary to build up a feed, such as
       *  "http://www.codinginparadise.org/".  Basicly it appends a slash 
to the end if there
       *  is not one, and removes any file names that might be at the end, 
such as
-     *  "myweblog.php".  The one special exception is for some Blosxom blogs,
+     *  "myweblog.php".
+     *
+     *  There is a special exception for some Blosxom blogs,
       *  which have things inside of a cgi-script and 'hang' their RSS files
       *  off of this cgi-bin.  For example,
       *  http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file
@@ -301,20 +305,22 @@
          throws MalformedURLException {

          // strip off any query string or anchors
-        int startStripAt = resource.indexOf("#");
-
-        if ( startStripAt == -1 ) {
-               startStripAt = resource.indexOf("?");
-        }
-        if (startStripAt != -1) {
-            resource = resource.substring(0, startStripAt);
+        if ( resource.indexOf("#") != -1 ) {
+            resource = resource.substring( 0, resource.indexOf("#") );
+        }
+        else if ( resource.indexOf("?") != -1 ) {
+            resource = resource.substring( 0, resource.indexOf("?") );
          }

          if (!resource.endsWith("blosxom.cgi")) {
              Matcher fileMatcher = patternToStrip.matcher(resource);
-            resource = fileMatcher.replaceAll("");
+            if (fileMatcher.find()) {
+                String stringToStrip = fileMatcher.group(1);
+                int startStrip = resource.indexOf(stringToStrip);
+                resource = resource.substring(0, startStrip);
+            }
          }
-
+
          if ( !resource.endsWith( "/" ) ) {
              resource = resource + "/";
          }
@@ -329,13 +335,8 @@
       *  @author Brad Neuberg, bkn3@columbia.edu
       */
      protected static boolean feedExists(String resource) throws Exception {
-
-        log.info( "Inside feedExists, resource = "+resource );
-
          ResourceRequest request = 
ResourceRequestFactory.getResourceRequest( resource );

-        log.info( "request = "+request );
-
          request.setRequestMethod( "HEAD" );
          request.setFollowRedirects( false );

@@ -343,7 +344,7 @@
          request.getContentLength();

          long response = request.getResponseCode();
-        log.info( "response = " + response );
+
          return response == 200;
      }

Index: src/java/org/apache/commons/feedparser/test/TestProbeLocator.java
===================================================================
RCS file: 
/home/cvspublic/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestProbeLocator.java,v
retrieving revision 1.2
diff -u -B -r1.2 TestProbeLocator.java
--- src/java/org/apache/commons/feedparser/test/TestProbeLocator.java   2 
Sep 2004 00:36:25 -0000       1.2
+++ src/java/org/apache/commons/feedparser/test/TestProbeLocator.java   3 
Sep 2004 23:27:53 -0000
@@ -249,81 +249,8 @@
          FeedReference feeds[] = null;
          FeedReference rssFeed, atomFeed;

-//         // This site should have an RSS feed
-//         resource = "http://www.xanga.com/home.aspx?user=lithium98";
-//         content = getContent(resource);
-//         assertNotNull(content);
-//         blogService = BlogServiceDiscovery.discover(resource, content);
-//         assertEquals(blogService, BlogService.XANGA);
-//         list = new FeedList();
-//         ProbeLocator.locate(resource, content, list);
-//         assertEquals(list.size(), 1);
-//         feeds = (FeedReference[])list.toArray(new 
FeedReference[list.size()]);
-//         assertEquals(feeds.length, 1);
-//         assertEquals(feeds[0].method, 
FeedReference.METHOD_PROBE_DISCOVERY);
-//         assertNull(feeds[0].title, null);
-//         assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
-//         assertEquals(feeds[0].resource, 
"http://www.xanga.com/rss.aspx?user=lithium98");
-//         /* test through the FeedLocator */
-//         list = FeedLocator.locate(resource);
-//         atomFeed = list.getAdAtomFeed();
-//         rssFeed = list.getAdRSSFeed();
-//         assertNull(atomFeed);
-//         assertNotNull(rssFeed);
-//         assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
-//         assertEquals(rssFeed.resource, 
"http://www.xanga.com/rss.aspx?user=lithium98");
-
-//         //  This site should have an RSS feed
-//         resource = "http://www.xanga.com/home.aspx?user=ChUnSA_86";
-//         content = getContent(resource);
-//         assertNotNull(content);
-//         blogService = BlogServiceDiscovery.discover(resource, content);
-//         assertEquals(blogService, BlogService.XANGA);
-//         list = new FeedList();
-//         ProbeLocator.locate(resource, content, list);
-//         assertEquals(list.size(), 1);
-//         feeds = (FeedReference[])list.toArray(new 
FeedReference[list.size()]);
-//         assertEquals(feeds.length, 1);
-//         assertEquals(feeds[0].method, 
FeedReference.METHOD_PROBE_DISCOVERY);
-//         assertNull(feeds[0].title, null);
-//         assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
-//         assertEquals(feeds[0].resource, 
"http://www.xanga.com/rss.aspx?user=ChUnSA_86");
-//         /* test through the FeedLocator */
-//         list = FeedLocator.locate(resource);
-//         atomFeed = list.getAdAtomFeed();
-//         rssFeed = list.getAdRSSFeed();
-//         assertNull(atomFeed);
-//         assertNotNull(rssFeed);
-//         assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
-//         assertEquals(rssFeed.resource, 
"http://www.xanga.com/rss.aspx?user=ChUnSA_86");
-
-//         //  This site should have an RSS feed
-//         resource = "http://www.xanga.com/home.aspx?user=wdfphillz";
-//         content = getContent(resource);
-//         assertNotNull(content);
-//         blogService = BlogServiceDiscovery.discover(resource, content);
-//         assertEquals(blogService, BlogService.XANGA);
-//         list = new FeedList();
-//         ProbeLocator.locate(resource, content, list);
-//         assertEquals(list.size(), 1);
-//         feeds = (FeedReference[])list.toArray(new 
FeedReference[list.size()]);
-//         assertEquals(feeds.length, 1);
-//         assertEquals(feeds[0].method, 
FeedReference.METHOD_PROBE_DISCOVERY);
-//         assertNull(feeds[0].title, null);
-//         assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
-//         assertEquals(feeds[0].resource, 
"http://www.xanga.com/rss.aspx?user=wdfphillz");
-//         /* test through the FeedLocator */
-//         list = FeedLocator.locate(resource);
-//         atomFeed = list.getAdAtomFeed();
-//         rssFeed = list.getAdRSSFeed();
-//         assertNull(atomFeed);
-//         assertNotNull(rssFeed);
-//         assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
-//         assertEquals(rssFeed.resource, 
"http://www.xanga.com/rss.aspx?user=wdfphillz");
-
          // This site should have an RSS feed
-
-        resource = "http://xanga.com/home.aspx?user=joe";
+        resource = "http://www.xanga.com/home.aspx?user=lithium98";
          content = getContent(resource);
          assertNotNull(content);
          blogService = BlogServiceDiscovery.discover(resource, content);
@@ -336,7 +263,7 @@
          assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
          assertNull(feeds[0].title, null);
          assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
-        assertEquals(feeds[0].resource, "http://xanga.com/rss.aspx?user=joe");
+        assertEquals(feeds[0].resource, 
"http://www.xanga.com/rss.aspx?user=lithium98");
          /* test through the FeedLocator */
          list = FeedLocator.locate(resource);
          atomFeed = list.getAdAtomFeed();
@@ -344,7 +271,82 @@
          assertNull(atomFeed);
          assertNotNull(rssFeed);
          assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
-        assertEquals(rssFeed.resource, "http://xanga.com/rss.aspx?user=joe");
+        assertEquals(rssFeed.resource, 
"http://www.xanga.com/rss.aspx?user=lithium98");
+
+        // This site should have an RSS feed
+        resource = "http://www.xanga.com/home.aspx?user=ChUnSA_86";
+        content = getContent(resource);
+        assertNotNull(content);
+        blogService = BlogServiceDiscovery.discover(resource, content);
+        assertEquals(blogService, BlogService.XANGA);
+        list = new FeedList();
+        ProbeLocator.locate(resource, content, list);
+        assertEquals(list.size(), 1);
+        feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+        assertEquals(feeds.length, 1);
+        assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+        assertNull(feeds[0].title, null);
+        assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+        assertEquals(feeds[0].resource, 
"http://www.xanga.com/rss.aspx?user=ChUnSA_86");
+        /* test through the FeedLocator */
+        list = FeedLocator.locate(resource);
+        atomFeed = list.getAdAtomFeed();
+        rssFeed = list.getAdRSSFeed();
+        assertNull(atomFeed);
+        assertNotNull(rssFeed);
+        assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+        assertEquals(rssFeed.resource, 
"http://www.xanga.com/rss.aspx?user=ChUnSA_86");
+
+        // This site should have an RSS feed
+        resource = "http://www.xanga.com/home.aspx?user=wdfphillz";
+        content = getContent(resource);
+        assertNotNull(content);
+        blogService = BlogServiceDiscovery.discover(resource, content);
+        assertEquals(blogService, BlogService.XANGA);
+        list = new FeedList();
+        ProbeLocator.locate(resource, content, list);
+        assertEquals(list.size(), 1);
+        feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+        assertEquals(feeds.length, 1);
+        assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+        assertNull(feeds[0].title, null);
+        assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+        assertEquals(feeds[0].resource, 
"http://www.xanga.com/rss.aspx?user=wdfphillz");
+        /* test through the FeedLocator */
+        list = FeedLocator.locate(resource);
+        atomFeed = list.getAdAtomFeed();
+        rssFeed = list.getAdRSSFeed();
+        assertNull(atomFeed);
+        assertNotNull(rssFeed);
+        assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+        assertEquals(rssFeed.resource, 
"http://www.xanga.com/rss.aspx?user=wdfphillz");
+
+        // This site should have an RSS feed
+        // FIXME: We should be able to pass this test when we
+        // expand resources inside of the Feed Parser; we don't
+        // currently do this yet, Brad Neuberg, bkn3@columbia.edu
+        /*resource = "http://xanga.com/home.aspx?user=joe";
+        content = getContent(resource);
+        assertNotNull(content);
+        blogService = BlogServiceDiscovery.discover(resource, content);
+        assertEquals(blogService, BlogService.XANGA);
+        list = new FeedList();
+        ProbeLocator.locate(resource, content, list);
+        assertEquals(list.size(), 1);
+        feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+        assertEquals(feeds.length, 1);
+        assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+        assertNull(feeds[0].title, null);
+        assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+        assertEquals(feeds[0].resource, 
"http://xanga.com/rss.aspx?user=joe");*/
+        /* test through the FeedLocator */
+        /*list = FeedLocator.locate(resource);
+        atomFeed = list.getAdAtomFeed();
+        rssFeed = list.getAdRSSFeed();
+        assertNull(atomFeed);
+        assertNotNull(rssFeed);
+        assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+        assertEquals(rssFeed.resource, 
"http://xanga.com/rss.aspx?user=joe");*/
      }

      public FeedList test( String resource,
@@ -375,33 +377,6 @@

      }

-    public static void main( String[] args ) throws Exception {
-
-        TestProbeLocator test = new TestProbeLocator( null );
-
-        test.test( "http://xanga.com/home.aspx?user=joe",
-                   BlogService.XANGA,
-                   1 );
-
-        test.test( "http://www.xanga.com/home.aspx?user=joe",
-                   BlogService.XANGA,
-                   1 );
-
-        /*test.testBlogger();
-        test.testLiveJournal();
-        test.testDiaryLand();
-        test.testMovableType();*/
-        //test.testXanga();
-        /*test.testWordPress();
-        test.testAOLJournal();
-        test.testTypePad();
-        test.testGreyMatter();
-        test.testPMachine();
-        test.testBlosxom();
-        test.testRadioUserland();
-        test.testTextPattern();*/
-    }
-
      public void testWordPress() throws Exception {
          FeedList list = null;
          String resource = null, content = null;
@@ -756,6 +731,30 @@
          assertNotNull(rssFeed);
          assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
          assertEquals(rssFeed.resource, 
"http://www.mondfish.net/pmachine/index.xml");
+
+        // This site should have a single RSS feed
+        resource = "http://bamph.com";
+        content = getContent(resource);
+        assertNotNull(content);
+        blogService = BlogServiceDiscovery.discover(resource, content);
+        assertEquals(blogService, BlogService.UNKNOWN);
+        list = new FeedList();
+        ProbeLocator.locate(resource, content, list);
+        assertEquals(list.size(), 1);
+        feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+        assertEquals(feeds.length, 1);
+        assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+        assertNull(feeds[0].title, null);
+        assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+        assertEquals(feeds[0].resource, "http://bamph.com/index.xml");
+        /* test through the FeedLocator */
+        list = FeedLocator.locate(resource);
+        atomFeed = list.getAdAtomFeed();
+        rssFeed = list.getAdRSSFeed();
+        assertNull(atomFeed);
+        assertNotNull(rssFeed);
+        assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+        assertEquals(rssFeed.resource, "http://bamph.com/index.xml");
      }

      public void testBlosxom() throws Exception {
@@ -1002,5 +1001,31 @@
          return results.toString();
      }

+    public static void main( String[] args ) throws Exception {
+
+        TestProbeLocator test = new TestProbeLocator( null );
+
+        /*test.test( "http://xanga.com/home.aspx?user=joe",
+                   BlogService.XANGA,
+                   1 );
+
+        test.test( "http://www.xanga.com/home.aspx?user=joe",
+                   BlogService.XANGA,
+                   1 );*/
+
+        test.testBlogger();
+        test.testLiveJournal();
+        test.testDiaryLand();
+        test.testMovableType();
+        test.testXanga();
+        test.testWordPress();
+        test.testAOLJournal();
+        test.testTypePad();
+        test.testGreyMatter();
+        test.testPMachine();
+        test.testBlosxom();
+        test.testRadioUserland();
+        test.testTextPattern();
+    }
  }

Re: [feedparser] Attaching patch again

Posted by Brad Neuberg <br...@yahoo.com>.

That regular expression is meant to catch things like
the following:

http://www.somehost.com/blog/blosxom.cgi

We want to strip off the trailing filename so that we
just get the path.

I added the test for the double slashes because I was
finding that the expression was matching the
following, which it shouldn't:

http://somehost.com

If someone puts a double slash for the first example:

http://somehost.com/blog//blosxom.cgi

Then it would prevent this from matching, which is a
bug; thanx for finding that.

I think I need to change the regex to not match if the
section in parentheses is preceded by a colon and two
slashes.  What do you think?

Hope you're having a good weekend,
  Brad

--- "Kevin A. Burton" <bu...@newsmonster.org> wrote:

> Brad Neuberg wrote:
> 
> > +    private static Pattern patternToStrip = 
> > Pattern.compile("[^/](/\\w*\\.\\w*$)");
> >
> Brad...
> 
> Won't the above regexp prevent:
> 
> http://foo.com//bar
> 
> from matching?
> 
> Usually the HTTP server will just do a 302 redir
> when using two slashes.
> 
> >      /**
> >       * A regex to extract the user from a Xanga
> URL
> > @@ -143,7 +143,8 @@
> >                new FeedReference("index.rss", 
> > FeedReference.RSS_MEDIA_TYPE),
> >                new FeedReference("rss.xml", 
> > FeedReference.RSS_MEDIA_TYPE),
> >                new FeedReference("index.rdf", 
> > FeedReference.RSS_MEDIA_TYPE),
> > -              new FeedReference("index.xml", 
> > FeedReference.XML_MEDIA_TYPE) };
> > +              new FeedReference("index.xml", 
> > FeedReference.RSS_MEDIA_TYPE),
> 
> Wondering if we should have a new media type...
> POTENTIAL_RSS_MEDIA_TYPE 
> ... this way we can just note that this MIGHT be a
> feed.
> 
> Other than that looks good. 
> 
> -- 
> 
> Please reply using PGP.
> 
>     http://peerfear.org/pubkey.asc    
>     
>     NewsMonster - http://www.newsmonster.org/
>     
> Kevin A. Burton, Location - San Francisco, CA, Cell
> - 415.595.9965
>        AIM/YIM - sfburtonator,  Web -
> http://peerfear.org/
> GPG fingerprint: 5FB2 F3E2 760E 70A8 6174 D393 E84D
> 8D04 99F1 4412
>   IRC - freenode.net #infoanarchy | #p2p-hackers |
> #newsmonster
> 
> 
>
---------------------------------------------------------------------
> To unsubscribe, e-mail:
> commons-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail:
> commons-dev-help@jakarta.apache.org
> 
> 


---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org

Re: [feedparser] Attaching patch again

Posted by "Kevin A. Burton" <bu...@newsmonster.org>.

Brad Neuberg wrote:

> +    private static Pattern patternToStrip = 
> Pattern.compile("[^/](/\\w*\\.\\w*$)");
>
Brad...

Won't the above regexp prevent:

http://foo.com//bar

from matching?

Usually the HTTP server will just do a 302 redir when using two slashes.

>      /**
>       * A regex to extract the user from a Xanga URL
> @@ -143,7 +143,8 @@
>                new FeedReference("index.rss", 
> FeedReference.RSS_MEDIA_TYPE),
>                new FeedReference("rss.xml", 
> FeedReference.RSS_MEDIA_TYPE),
>                new FeedReference("index.rdf", 
> FeedReference.RSS_MEDIA_TYPE),
> -              new FeedReference("index.xml", 
> FeedReference.XML_MEDIA_TYPE) };
> +              new FeedReference("index.xml", 
> FeedReference.RSS_MEDIA_TYPE),

Wondering if we should have a new media type... POTENTIAL_RSS_MEDIA_TYPE 
... this way we can just note that this MIGHT be a feed.

Other than that looks good. 

-- 

Please reply using PGP.

    http://peerfear.org/pubkey.asc    
    
    NewsMonster - http://www.newsmonster.org/
    
Kevin A. Burton, Location - San Francisco, CA, Cell - 415.595.9965
       AIM/YIM - sfburtonator,  Web - http://peerfear.org/
GPG fingerprint: 5FB2 F3E2 760E 70A8 6174 D393 E84D 8D04 99F1 4412
  IRC - freenode.net #infoanarchy | #p2p-hackers | #newsmonster


---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org

Re: [feedparser] Attaching patch again

Posted by Brad Neuberg <br...@yahoo.com>.

Hi Kevin.  Did you rerun the unit tests after that
change to make sure they work?

Brad

--- "Kevin A. Burton" <bu...@newsmonster.org> wrote:

> Brad Neuberg wrote:
> 
> > It looks like attachments are stripped out of the
> mailing list; here 
> > is the patch in unified diff format:
> >
> I rewrote getFeedPath
> 
> 1.  it was using duplicate indexOf queries for the
> same value.  Wasted 
> CPU :-/  Also I used lastIndexOf to start scan from
> the end of the string.
> 
> 2.  It stripped text after # or ? but not both... so
> 
> http://foo.com?cat=dog#here
> 
> would not work...
> 
> Here is the new method body:
> 
> >    protected static String getFeedPath( String
> resource ) 
> >
> >        throws MalformedURLException {
> >
> >        
> >
> >        // strip off any query string or anchors
> >
> >        int end = resource.lastIndexOf( "#" );
> >
> >        
> >
> >        if ( end != -1 )
> >
> >            resource = resource.substring( 0, end
> );
> >
> >        end = resource.lastIndexOf( "?" );
> >
> >        if ( end != -1 )
> >
> >            resource = resource.substring( 0, end
> );
> >
> >        if ( ! resource.endsWith( "blosxom.cgi" ) )
> {
> >
> >            Matcher fileMatcher =
> patternToStrip.matcher(resource);
> >
> >            resource = fileMatcher.replaceAll("");
> >
> >        }
> >
> >        
> >
> >        if ( ! resource.endsWith( "/" ) ) {
> >
> >            resource = resource + "/";
> >
> >        }
> >
> >        
> >
> >        return resource;
> >
> >    }
> >
> 
> 
> Kevin
> 
> -- 
> 
> Please reply using PGP.
> 
>     http://peerfear.org/pubkey.asc    
>     
>     NewsMonster - http://www.newsmonster.org/
>     
> Kevin A. Burton, Location - San Francisco, CA, Cell
> - 415.595.9965
>        AIM/YIM - sfburtonator,  Web -
> http://peerfear.org/
> GPG fingerprint: 5FB2 F3E2 760E 70A8 6174 D393 E84D
> 8D04 99F1 4412
>   IRC - freenode.net #infoanarchy | #p2p-hackers |
> #newsmonster
> 
> 
>
---------------------------------------------------------------------
> To unsubscribe, e-mail:
> commons-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail:
> commons-dev-help@jakarta.apache.org
> 
> 


---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org

Re: [feedparser] Attaching patch again

Posted by "Kevin A. Burton" <bu...@newsmonster.org>.

Brad Neuberg wrote:

> It looks like attachments are stripped out of the mailing list; here 
> is the patch in unified diff format:
>
I rewrote getFeedPath

1.  it was using duplicate indexOf queries for the same value.  Wasted 
CPU :-/  Also I used lastIndexOf to start scan from the end of the string.

2.  It stripped text after # or ? but not both... so

http://foo.com?cat=dog#here

would not work...

Here is the new method body:

>    protected static String getFeedPath( String resource ) 
>
>        throws MalformedURLException {
>
>        
>
>        // strip off any query string or anchors
>
>        int end = resource.lastIndexOf( "#" );
>
>        
>
>        if ( end != -1 )
>
>            resource = resource.substring( 0, end );
>
>        end = resource.lastIndexOf( "?" );
>
>        if ( end != -1 )
>
>            resource = resource.substring( 0, end );
>
>        if ( ! resource.endsWith( "blosxom.cgi" ) ) {
>
>            Matcher fileMatcher = patternToStrip.matcher(resource);
>
>            resource = fileMatcher.replaceAll("");
>
>        }
>
>        
>
>        if ( ! resource.endsWith( "/" ) ) {
>
>            resource = resource + "/";
>
>        }
>
>        
>
>        return resource;
>
>    }
>


Kevin

-- 

Please reply using PGP.

    http://peerfear.org/pubkey.asc    
    
    NewsMonster - http://www.newsmonster.org/
    
Kevin A. Burton, Location - San Francisco, CA, Cell - 415.595.9965
       AIM/YIM - sfburtonator,  Web - http://peerfear.org/
GPG fingerprint: 5FB2 F3E2 760E 70A8 6174 D393 E84D 8D04 99F1 4412
  IRC - freenode.net #infoanarchy | #p2p-hackers | #newsmonster


---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org

RE: [feedparser] Attaching patch again

Posted by "Noel J. Bergman" <no...@devtech.com>.

> It looks like attachments are stripped out of the mailing
> list; here is the patch in unified diff format

Yes, almost all MIME types are stripped, and .ZIPs are rejected outright.
It has become almost impossible to attach anything useful to a message on
the list.

You could create an attachment on a bugzilla bug.  I added FeedParser as a
component.

	--- Noel


---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org