You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by eh...@apache.org on 2007/02/16 04:01:11 UTC
svn commit: r508291 -
/lucene/solr/trunk/client/ruby/solrb/examples/marc/marc_importer.rb
Author: ehatcher
Date: Thu Feb 15 19:01:11 2007
New Revision: 508291
URL: http://svn.apache.org/viewvc?view=rev&rev=508291
Log:
Add year, call number, isbn, and filename to the documents. note that the filename_facet is not currently generalizable and specific to the data files i load, which are numbered but this should not be a problem
Modified:
lucene/solr/trunk/client/ruby/solrb/examples/marc/marc_importer.rb
Modified: lucene/solr/trunk/client/ruby/solrb/examples/marc/marc_importer.rb
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/client/ruby/solrb/examples/marc/marc_importer.rb?view=diff&rev=508291&r1=508290&r2=508291
==============================================================================
--- lucene/solr/trunk/client/ruby/solrb/examples/marc/marc_importer.rb (original)
+++ lucene/solr/trunk/client/ruby/solrb/examples/marc/marc_importer.rb Thu Feb 15 19:01:11 2007
@@ -16,6 +16,7 @@
solr_url = ENV["SOLR_URL"] || "http://localhost:8983/solr"
marc_filename = ARGV[0]
+file_number = marc_filename.scan(/\d\d/)
debug = ARGV[1] == "-debug"
$KCODE = 'UTF8'
@@ -31,18 +32,24 @@
:subject_era_facet => ['650d', '650y', '651y', '655y'],
:subject_topic_facet => ['650a', '650b', '650x'],
:subject_geographic_facet => ['650c', '650z', '651a', '651x', '651z', '655z'],
+ :year_facet => Proc.new do |r|
+ extract_record_data(r,'260c').collect {|f| f.scan(/\d\d\d\d/)}.flatten
+ end,
:title_text => '245a',
:author_text => '100a',
-# :call_number => '050a',
+ :call_number_text => '050a',
+ :isbn_text => '010a',
+ :filename_facet => Proc.new {|r| file_number},
}
connection = Solr::Connection.new(solr_url)
if marc_filename =~ /.gz$/
puts "Unzipping data file..."
- system("cp #{marc_filename} /tmp/marc_data.mrc.gz")
- system("gunzip /tmp/marc_data.mrc.gz")
- marc_filename = "/tmp/marc_data.mrc"
+ temp_filename = "/tmp/marc_data_#{file_number}.mrc"
+ system("cp #{marc_filename} #{temp_filename}.gz")
+ system("gunzip #{temp_filename}")
+ marc_filename = temp_filename
end
reader = MARC::Reader.new(marc_filename)
@@ -69,7 +76,7 @@
extracted_data.compact.uniq
end
-puts "Indexing..."
+puts "Indexing #{marc_filename}..."
for record in reader
doc = {}
mapping.each do |key,value|