You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by mi...@apache.org on 2005/04/27 17:54:15 UTC
svn commit: r164999 - in
/lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search:
crawler-live.xconf robots.txt
Author: michi
Date: Wed Apr 27 08:54:15 2005
New Revision: 164999
URL: http://svn.apache.org/viewcvs?rev=164999&view=rev
Log:
sample files for crawling added
Added:
lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/crawler-live.xconf
lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/robots.txt
Added: lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/crawler-live.xconf
URL: http://svn.apache.org/viewcvs/lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/crawler-live.xconf?rev=164999&view=auto
==============================================================================
--- lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/crawler-live.xconf (added)
+++ lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/crawler-live.xconf Wed Apr 27 08:54:15 2005
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!--
+ Copyright 1999-2004 The Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<crawler>
+ <user-agent>lenya</user-agent>
+
+ <base-url href="http://127.0.0.1:8888/default/live/index.html"/>
+ <scope-url href="http://127.0.0.1:8888/default/live/"/>
+
+ <uri-list src="../../work/search/lucene/uris.txt"/>
+ <htdocs-dump-dir src="../../work/search/lucene/htdocs_dump/live"/>
+
+ <robots src="robots.txt" domain="127.0.0.1"/>
+</crawler>
Added: lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/robots.txt
URL: http://svn.apache.org/viewcvs/lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/robots.txt?rev=164999&view=auto
==============================================================================
--- lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/robots.txt (added)
+++ lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/robots.txt Wed Apr 27 08:54:15 2005
@@ -0,0 +1,9 @@
+# 127.0.0.1
+
+User-agent: *
+Disallow: /there_seems_to_be_a_bug_within_websphinx_Robot_Exclusion.html
+#Disallow:
+
+User-agent: lenya
+Disallow: /foo/bar.html
+#Disallow: /foo/bar/
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org