You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by og...@apache.org on 2011/09/20 18:49:01 UTC

svn commit: r1173246 - /incubator/stanbol/trunk/entityhub/indexing/dbpedia/fetch_prepare.sh

Author: ogrisel
Date: Tue Sep 20 16:49:00 2011
New Revision: 1173246

URL: http://svn.apache.org/viewvc?rev=1173246&view=rev
Log:
STANBOL-323: clean images_en.nt file manually before importing

Modified:
    incubator/stanbol/trunk/entityhub/indexing/dbpedia/fetch_prepare.sh

Modified: incubator/stanbol/trunk/entityhub/indexing/dbpedia/fetch_prepare.sh
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/dbpedia/fetch_prepare.sh?rev=1173246&r1=1173245&r2=1173246&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/dbpedia/fetch_prepare.sh (original)
+++ incubator/stanbol/trunk/entityhub/indexing/dbpedia/fetch_prepare.sh Tue Sep 20 16:49:00 2011
@@ -45,9 +45,16 @@ wget -c $DBPEDIA/ru/labels_ru.nt.bz2
 wget -c $DBPEDIA/tr/labels_tr.nt.bz2
 wget -c $DBPEDIA/zh/labels_zh.nt.bz2
 wget -c $DBPEDIA/en/short_abstracts_en.nt.bz2
-wget -c $DBPEDIA/en/images_en.nt.bz2
 #wget -c $DBPEDIA/en/long_abstracts_en.not.bz2
 
+# special handling of the image file that has 5 corrupted entries
+if [ ! -f images_en.nt ]
+then
+    wget -c $DBPEDIA/en/images_en.nt.bz2
+    bzcat images_en.nt.bz2 | grep -v '\\' > images_en.nt
+    rm -f images_en.nt.bz2
+fi
+
 # Type specific attributes
 wget -c $DBPEDIA/en/geo_coordinates_en.nt.bz2
 wget -c $DBPEDIA/en/persondata_en.nt.bz2