You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by eh...@apache.org on 2007/04/26 04:54:54 UTC

svn commit: r532580 - in /lucene/solr/trunk/client/ruby/solr-ruby: lib/solr/indexer.rb test/unit/indexer_test.rb

Author: ehatcher
Date: Wed Apr 25 19:54:53 2007
New Revision: 532580

URL: http://svn.apache.org/viewvc?view=rev&rev=532580
Log:
Add buffering to allow indexer to add multiple documents at a time, determined by the :buffer_docs parameter.  If not specified, individual documents are added to Solr.

Added:
    lucene/solr/trunk/client/ruby/solr-ruby/test/unit/indexer_test.rb   (with props)
Modified:
    lucene/solr/trunk/client/ruby/solr-ruby/lib/solr/indexer.rb

Modified: lucene/solr/trunk/client/ruby/solr-ruby/lib/solr/indexer.rb
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/client/ruby/solr-ruby/lib/solr/indexer.rb?view=diff&rev=532580&r1=532579&r2=532580
==============================================================================
--- lucene/solr/trunk/client/ruby/solr-ruby/lib/solr/indexer.rb (original)
+++ lucene/solr/trunk/client/ruby/solr-ruby/lib/solr/indexer.rb Wed Apr 25 19:54:53 2007
@@ -11,19 +11,44 @@
 # limitations under the License.
 
 class Solr::Indexer
+  # deprecated, use Indexer.new(ds,mapping).index instead
   def self.index(data_source, mapper_or_mapping, options={})
-    mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
+    indexer = Solr::Indexer.new(data_source, mapper_or_mapping, options={})
+    indexer.index
+  end
+  
+  def initialize(data_source, mapper_or_mapping, options={})
     solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
-    
-    solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
-    data_source.each do |record|
-      document = mapper.map(record)
+    @solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
+
+    @data_source = data_source
+    @mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
+
+    @buffer_docs = options[:buffer_docs]
+    @debug = options[:debug]
+  end
+
+  def index
+    buffer = []
+    @data_source.each do |record|
+      document = @mapper.map(record)
       
       yield(record, document) if block_given?
       
-      solr.add(document) unless options[:debug]
-      puts document.inspect if options[:debug]
+      buffer << document
+      
+      if !@buffer_docs || buffer.size == @buffer_docs
+        add_docs(buffer)
+        buffer.clear
+      end
     end
-    solr.commit unless options[:debug]
+    add_docs(buffer) if !buffer.empty?
+    
+    @solr.commit unless @debug
+  end
+  
+  def add_docs(documents)
+    @solr.add(documents) unless @debug
+    puts documents.inspect if @debug
   end
-end
\ No newline at end of file
+end

Added: lucene/solr/trunk/client/ruby/solr-ruby/test/unit/indexer_test.rb
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/client/ruby/solr-ruby/test/unit/indexer_test.rb?view=auto&rev=532580
==============================================================================
--- lucene/solr/trunk/client/ruby/solr-ruby/test/unit/indexer_test.rb (added)
+++ lucene/solr/trunk/client/ruby/solr-ruby/test/unit/indexer_test.rb Wed Apr 25 19:54:53 2007
@@ -0,0 +1,57 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+require 'test/unit'
+require 'solr'
+
+class Solr::Indexer
+  attr_reader :added
+  def add_docs(doc)
+    @added ||= []
+    @added << doc
+  end
+end
+
+class IndexerTest < Test::Unit::TestCase
+  def test_mapping_or_mapping
+    mapping = {:field => "foo"}
+    indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true)
+    indexer.index
+    assert_equal 3, indexer.added.size
+    
+    indexer = Solr::Indexer.new([1,2,3,4], Solr::Importer::Mapper.new(mapping), :debug => true)
+    indexer.index
+    assert_equal 4, indexer.added.size
+  end
+
+  def test_batch
+    mapping = {:field => "foo"}
+    indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true, :buffer_docs => 2)
+    indexer.index
+    assert_equal 2, indexer.added.size
+  end
+  
+end
+
+
+# source = DataSource.new
+# 
+# mapping = {
+#   :id => :isbn,
+#   :name => :author,
+#   :source => "BOOKS",
+#   :year => Proc.new {|record| record.date[0,4] },
+# }
+# 
+# Solr::Indexer.index(source, mapper) do |orig_data, solr_document|
+#   solr_document[:timestamp] = Time.now
+# end
\ No newline at end of file

Propchange: lucene/solr/trunk/client/ruby/solr-ruby/test/unit/indexer_test.rb
------------------------------------------------------------------------------
    svn:executable = *