You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2016/01/22 22:28:21 UTC

svn commit: r1726318 - /nutch/trunk/conf/regex-normalize.xml.template

Author: snagel
Date: Fri Jan 22 21:28:21 2016
New Revision: 1726318

URL: http://svn.apache.org/viewvc?rev=1726318&view=rev
Log:
NUTCH-2204 : revert erroneous commit

Modified:
    nutch/trunk/conf/regex-normalize.xml.template

Modified: nutch/trunk/conf/regex-normalize.xml.template
URL: http://svn.apache.org/viewvc/nutch/trunk/conf/regex-normalize.xml.template?rev=1726318&r1=1726317&r2=1726318&view=diff
==============================================================================
--- nutch/trunk/conf/regex-normalize.xml.template (original)
+++ nutch/trunk/conf/regex-normalize.xml.template Fri Jan 22 21:28:21 2016
@@ -39,12 +39,11 @@
   <substitution>/$3</substitution>
 </regex> -->
 
-<!-- removes interpage href anchors such as site.com#location
-     (also done by urlnormalizer-basic
+<!-- removes interpage href anchors such as site.com#location -->
 <regex>
-  <pattern>#.*</pattern>
+  <pattern>#.*?(\?|&amp;|$)</pattern>
   <substitution>$1</substitution>
-</regex> -->
+</regex>
 
 <!-- cleans ?&amp;var=value into ?var=value -->
 <regex>