You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2013/12/10 03:10:19 UTC

[lucy-commits] [1/3] git commit: refs/heads/master - Make Reuters extractor less fussy about cwd.

Updated Branches:
  refs/heads/master 344a875a2 -> 545e3bbbe


Make Reuters extractor less fussy about cwd.


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/e097e83f
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/e097e83f
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/e097e83f

Branch: refs/heads/master
Commit: e097e83f26bb2a68359e7c345ef68dccd5830b8f
Parents: 344a875
Author: Marvin Humphrey <ma...@rectangular.com>
Authored: Thu Dec 5 20:16:16 2013 -0800
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Thu Dec 5 20:16:16 2013 -0800

----------------------------------------------------------------------
 devel/benchmarks/README.txt          | 4 +---
 devel/benchmarks/extract_reuters.plx | 7 +++----
 2 files changed, 4 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/e097e83f/devel/benchmarks/README.txt
----------------------------------------------------------------------
diff --git a/devel/benchmarks/README.txt b/devel/benchmarks/README.txt
index 9746350..308102f 100755
--- a/devel/benchmarks/README.txt
+++ b/devel/benchmarks/README.txt
@@ -15,9 +15,7 @@ of the un-tarred Reuters collection.
 
     ./extract_reuters.plx /path/to/reuters_collection
 
-Filepaths are hard-coded, and the assumption is that the apps will be run from
-within the benchmarks/ directory.  Each of the indexing apps takes four
-optional command line arguments: 
+Each of the indexing apps takes four optional command line arguments: 
 
   * The number of documents to index.
   * The number of times to repeat the indexing process.

http://git-wip-us.apache.org/repos/asf/lucy/blob/e097e83f/devel/benchmarks/extract_reuters.plx
----------------------------------------------------------------------
diff --git a/devel/benchmarks/extract_reuters.plx b/devel/benchmarks/extract_reuters.plx
index 744afae..4ef345c 100755
--- a/devel/benchmarks/extract_reuters.plx
+++ b/devel/benchmarks/extract_reuters.plx
@@ -20,14 +20,12 @@ use warnings;
 
 use File::Spec::Functions qw( catfile catdir );
 use Cwd qw( getcwd );
+use Fcntl;
 
 # Ensure call from correct location and with required arg.
 my $source_dir = $ARGV[0];
 die "Usage: ./extract_reuters.plx /path/to/expanded/archive"
     unless -d $source_dir;
-my $working_dir = getcwd;
-die "Must be run from the benchmarks/ directory"
-    unless ( $working_dir =~ /benchmarks\W*$/ );
 
 # Create the main output directory.
 my $main_out_dir = 'extracted_corpus';
@@ -95,7 +93,8 @@ for my $sgm_file (@sgm_files) {
             if ( length $title and length $body ) {
                 my $out_filename = sprintf( "article%05d.txt", $num_files );
                 my $out_filepath = catfile( $out_dir, $out_filename );
-                open( my $out_fh, '>', $out_filepath )
+                sysopen( my $out_fh, $out_filepath,
+                    O_CREAT | O_EXCL | O_WRONLY )
                     or die "Couldn't open '$out_filepath' for writing: $!";
                 $title =~ s/^\s*//;
                 $title =~ s/\s*$//;


[lucy-commits] [2/3] git commit: refs/heads/master - Add staging dirs to @INC.

Posted by ma...@apache.org.
Add staging dirs to @INC.

Add the `blib` directories for both Clownfish and Perl XS builds for the
benchmark indexing script.


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/9ccb11c2
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/9ccb11c2
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/9ccb11c2

Branch: refs/heads/master
Commit: 9ccb11c26381d04771ce9ccaf83332d87bf37b03
Parents: e097e83
Author: Marvin Humphrey <ma...@rectangular.com>
Authored: Thu Dec 5 20:43:24 2013 -0800
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Thu Dec 5 20:48:13 2013 -0800

----------------------------------------------------------------------
 devel/benchmarks/indexers/lucy_indexer.plx | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/9ccb11c2/devel/benchmarks/indexers/lucy_indexer.plx
----------------------------------------------------------------------
diff --git a/devel/benchmarks/indexers/lucy_indexer.plx b/devel/benchmarks/indexers/lucy_indexer.plx
index 8ec1f0a..a309e65 100755
--- a/devel/benchmarks/indexers/lucy_indexer.plx
+++ b/devel/benchmarks/indexers/lucy_indexer.plx
@@ -18,8 +18,16 @@
 use strict;
 use warnings;
 
-use lib '../devel/benchmarks/indexers';
-use lib 'devel/benchmarks/indexers';
+use FindBin qw( $Bin );
+use lib $Bin;
+use lib "$Bin/../../clownfish/runtime/perl/blib/arch";
+use lib "$Bin/../../clownfish/runtime/perl/blib/lib";
+use lib "$Bin/../../../clownfish/runtime/perl/blib/arch";
+use lib "$Bin/../../../clownfish/runtime/perl/blib/lib";
+use lib "$Bin/../../../blib/arch";
+use lib "$Bin/../../../blib/lib";
+use lib "$Bin/../../../perl/blib/arch";
+use lib "$Bin/../../../perl/blib/lib";
 
 use Getopt::Long;
 use Cwd qw( getcwd );


[lucy-commits] [3/3] git commit: refs/heads/master - Add specified blib directory to lucy_indexer subprocesses

Posted by ma...@apache.org.
Add specified blib directory to lucy_indexer subprocesses


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/545e3bbb
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/545e3bbb
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/545e3bbb

Branch: refs/heads/master
Commit: 545e3bbbed93b79d1cbf07465afc163c0bbe4769
Parents: 9ccb11c
Author: Kurt Starsinic <ks...@gmail.com>
Authored: Mon Dec 9 17:18:07 2013 -0500
Committer: Kurt Starsinic <ks...@gmail.com>
Committed: Mon Dec 9 17:18:07 2013 -0500

----------------------------------------------------------------------
 devel/benchmarks/README.txt                | 2 +-
 devel/benchmarks/indexers/lucy_indexer.plx | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/545e3bbb/devel/benchmarks/README.txt
----------------------------------------------------------------------
diff --git a/devel/benchmarks/README.txt b/devel/benchmarks/README.txt
index 308102f..95a9dca 100755
--- a/devel/benchmarks/README.txt
+++ b/devel/benchmarks/README.txt
@@ -22,7 +22,7 @@ Each of the indexing apps takes four optional command line arguments:
   * The increment, or number of docs to add during each index writer instance.
   * Whether or not the main text should be stored and highlightable.
 
-    $ perl -Mblib indexers/lucy_indexer.plx \
+    $ perl -Mblib=../../perl indexers/lucy_indexer.plx \
     > --docs=1000 --reps=6 --increment=10 --store=1
 
     $ java -server -Xmx500M -XX:CompileThreshold=100 LuceneIndexer \

http://git-wip-us.apache.org/repos/asf/lucy/blob/545e3bbb/devel/benchmarks/indexers/lucy_indexer.plx
----------------------------------------------------------------------
diff --git a/devel/benchmarks/indexers/lucy_indexer.plx b/devel/benchmarks/indexers/lucy_indexer.plx
index a309e65..bdcc4c0 100755
--- a/devel/benchmarks/indexers/lucy_indexer.plx
+++ b/devel/benchmarks/indexers/lucy_indexer.plx
@@ -67,7 +67,7 @@ else {
         for (@INC) {
             next unless /\bblib\b/;
             # Propagate -Mblib to the child.
-            $command .= "-Mblib ";
+            $command .= "-Mblib=$_ ";
             last;
         }
         $command .= "$0 --build_index=1 ";