You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by mi...@apache.org on 2005/04/27 17:54:15 UTC

svn commit: r164999 - in /lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search: crawler-live.xconf robots.txt

Author: michi
Date: Wed Apr 27 08:54:15 2005
New Revision: 164999

URL: http://svn.apache.org/viewcvs?rev=164999&view=rev
Log:
sample files for crawling added

Added:
    lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/crawler-live.xconf
    lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/robots.txt

Added: lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/crawler-live.xconf
URL: http://svn.apache.org/viewcvs/lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/crawler-live.xconf?rev=164999&view=auto
==============================================================================
--- lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/crawler-live.xconf (added)
+++ lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/crawler-live.xconf Wed Apr 27 08:54:15 2005
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!--
+  Copyright 1999-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<crawler>
+  <user-agent>lenya</user-agent>
+
+  <base-url href="http://127.0.0.1:8888/default/live/index.html"/>
+  <scope-url href="http://127.0.0.1:8888/default/live/"/>
+
+  <uri-list src="../../work/search/lucene/uris.txt"/>
+  <htdocs-dump-dir src="../../work/search/lucene/htdocs_dump/live"/>
+
+  <robots src="robots.txt" domain="127.0.0.1"/>
+</crawler>

Added: lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/robots.txt
URL: http://svn.apache.org/viewcvs/lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/robots.txt?rev=164999&view=auto
==============================================================================
--- lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/robots.txt (added)
+++ lenya/branches/BRANCH_1_2_X/src/webapp/lenya/pubs/default/config/search/robots.txt Wed Apr 27 08:54:15 2005
@@ -0,0 +1,9 @@
+# 127.0.0.1
+
+User-agent: *
+Disallow: /there_seems_to_be_a_bug_within_websphinx_Robot_Exclusion.html
+#Disallow:
+
+User-agent: lenya
+Disallow: /foo/bar.html
+#Disallow: /foo/bar/



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org