You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by uj...@apache.org on 2014/03/19 17:08:11 UTC

[01/50] [abbrv] git commit: ACCUMULO-375 added compression and increased the minimum split size

Repository: accumulo-wikisearch
Updated Branches:
  refs/heads/master f11759eb5 -> 1990979f6


ACCUMULO-375 added compression and increased the minimum split size

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1241940 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/57bf9cf3
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/57bf9cf3
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/57bf9cf3

Branch: refs/heads/master
Commit: 57bf9cf34b891c12d4361e8d5224a2aa38b63a6c
Parents: 266455b
Author: Adam Fuchs <af...@apache.org>
Authored: Wed Feb 8 15:37:04 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Wed Feb 8 15:37:04 2012 +0000

----------------------------------------------------------------------
 .../examples/wikisearch/ingest/WikipediaPartitionedIngester.java | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/57bf9cf3/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
index 43f5e29..5571290 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
@@ -178,6 +179,8 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
     partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class);
     Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf);
     SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir);
+    SequenceFileOutputFormat.setCompressOutput(partitionerJob, true);
+    SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD);
     
     return partitionerJob.waitForCompletion(true) ? 0 : 1;
   }
@@ -209,6 +212,7 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
     // setup input format
     ingestJob.setInputFormatClass(SequenceFileInputFormat.class);
     SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
+    SequenceFileInputFormat.setMinInputSplitSize(ingestJob, 1l << 28);
 
     // setup output format
     ingestJob.setMapOutputKeyClass(Text.class);


[49/50] [abbrv] git commit: ACCUMULO-2446 Wikisearch now works with Accumulo 1.5.0 on both Hadoop 1.0.4 and 2.0.4-alpha.

Posted by uj...@apache.org.
ACCUMULO-2446 Wikisearch now works with Accumulo 1.5.0 on both Hadoop 1.0.4 and
2.0.4-alpha.

Added unit tests that get activated under either hadoop profile. This was
necessary due to implementation changes with TaskAttemptContext.

Several runtime dependencies were explicitly added to the ingest and query
projects so that following deployment instructions required no extra steps.


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/1d5c80be
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/1d5c80be
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/1d5c80be

Branch: refs/heads/master
Commit: 1d5c80bedc973aa081ed9a7570a08651a03eb76c
Parents: 0ef257a
Author: Bill Slacum <bi...@koverse.com>
Authored: Fri Feb 7 01:14:57 2014 -0500
Committer: Bill Slacum <bi...@koverse.com>
Committed: Tue Mar 18 23:34:21 2014 -0400

----------------------------------------------------------------------
 .gitignore                                      |   4 +
 README                                          |   6 +-
 README.parallel                                 |   2 +-
 accumulo-wikisearch.iml                         |  14 -
 ingest/bin/ingest.sh                            |   2 +-
 ingest/bin/ingest_parallel.sh                   |   4 +-
 ingest/pom.xml                                  |  70 ++-
 .../reader/AggregatingRecordReaderTest.java     | 288 +++++++++++
 .../reader/AggregatingRecordReaderTest.java     | 289 +++++++++++
 .../reader/AggregatingRecordReaderTest.java     | 288 -----------
 pom.xml                                         | 140 +++++-
 query/pom.xml                                   | 117 ++++-
 .../wikisearch/logic/TestQueryLogic.java        | 197 ++++++++
 .../wikisearch/logic/TestQueryLogic.java        | 477 +++++++++++++++++++
 .../wikisearch/logic/TestQueryLogic.java        | 197 --------
 15 files changed, 1558 insertions(+), 537 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0f31ce3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+**/target
+.idea
+**/*.iml
+**/lib
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/README
----------------------------------------------------------------------
diff --git a/README b/README
index 041490f..ad28cdc 100644
--- a/README
+++ b/README
@@ -38,11 +38,11 @@
 	1. Copy the query/src/main/resources/META-INF/ejb-jar.xml.example file to 
 	   query/src/main/resources/META-INF/ejb-jar.xml. Modify to the file to contain the same 
 	   information that you put into the wikipedia.xml file from the Ingest step above. 
-	2. Re-build the query distribution by running 'mvn package assembly:single' in the top-level directory. 
+	2. Re-build the query distribution by running 'mvn package assembly:single' in the query module's directory.
         3. Untar the resulting file in the $JBOSS_HOME/server/default directory.
 
               $ cd $JBOSS_HOME/server/default
-              $ tar -xzf $ACCUMULO_HOME/src/examples/wikisearch/query/target/wikisearch-query*.tar.gz
+              $ tar -xzf /some/path/to/wikisearch/query/target/wikisearch-query*.tar.gz
  
            This will place the dependent jars in the lib directory and the EJB jar into the deploy directory.
 	4. Next, copy the wikisearch*.war file in the query-war/target directory to $JBOSS_HOME/server/default/deploy. 
@@ -51,11 +51,9 @@
 			setauths -u <user> -s all,enwiki,eswiki,frwiki,fawiki
 	7. Copy the following jars to the $ACCUMULO_HOME/lib/ext directory from the $JBOSS_HOME/server/default/lib directory:
 	
-		commons-lang*.jar
 		kryo*.jar
 		minlog*.jar
 		commons-jexl*.jar
-		google-collections*.jar
 		
 	8. Copy the $JBOSS_HOME/server/default/deploy/wikisearch-query*.jar to $ACCUMULO_HOME/lib/ext.
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/README.parallel
----------------------------------------------------------------------
diff --git a/README.parallel b/README.parallel
index 477556b..399f0f3 100644
--- a/README.parallel
+++ b/README.parallel
@@ -52,7 +52,7 @@
 		kryo*.jar
 		minlog*.jar
 		commons-jexl*.jar
-		google-collections*.jar
+		guava*.jar
 		
 	8. Copy the $JBOSS_HOME/server/default/deploy/wikisearch-query*.jar to $ACCUMULO_HOME/lib/ext.
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/accumulo-wikisearch.iml
----------------------------------------------------------------------
diff --git a/accumulo-wikisearch.iml b/accumulo-wikisearch.iml
deleted file mode 100644
index 8015fa7..0000000
--- a/accumulo-wikisearch.iml
+++ /dev/null
@@ -1,14 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
-  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_6" inherit-compiler-output="false">
-    <output url="file://$MODULE_DIR$/target/classes" />
-    <output-test url="file://$MODULE_DIR$/target/test-classes" />
-    <exclude-output />
-    <content url="file://$MODULE_DIR$">
-      <excludeFolder url="file://$MODULE_DIR$/target" />
-    </content>
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-</module>
-

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/ingest/bin/ingest.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest.sh b/ingest/bin/ingest.sh
index dbb9b05..73d582d 100755
--- a/ingest/bin/ingest.sh
+++ b/ingest/bin/ingest.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.5.0-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.5.0.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/ingest/bin/ingest_parallel.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest_parallel.sh b/ingest/bin/ingest_parallel.sh
index 003b7f9..1619603 100755
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@ -38,8 +38,8 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.5.0-SNAPSHOT.jar
-CONF=$SCRIPT_DIR/../conf/wikipedia.xml
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.5.0.jar
+CONF=$SCRIPT_DIR/../conf/wikipedia_parallel.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH
 echo "hadoop jar $JAR org.apache.accumulo.examples.wikisearch.ingest.WikipediaPartitionedIngester -libjars $LIBJARS -conf $CONF -Dwikipedia.input=${HDFS_DATA_DIR}"

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index fdf08e9..a6f3d70 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-  <!--
+<!--
     Licensed to the Apache Software Foundation (ASF) under one or more
     contributor license agreements. See the NOTICE file distributed with
     this work for additional information regarding copyright ownership.
@@ -22,14 +22,16 @@
     <artifactId>accumulo-wikisearch</artifactId>
     <version>1.5.0</version>
   </parent>
-
   <artifactId>wikisearch-ingest</artifactId>
   <name>wikisearch-ingest</name>
-
   <dependencies>
     <dependency>
-      <groupId>com.google.collections</groupId>
-      <artifactId>google-collections</artifactId>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
     </dependency>
     <dependency>
       <groupId>com.google.protobuf</groupId>
@@ -52,10 +54,6 @@
       <artifactId>accumulo-core</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-    </dependency>
-    <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-core</artifactId>
     </dependency>
@@ -64,12 +62,16 @@
       <artifactId>lucene-wikipedia</artifactId>
     </dependency>
     <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
   </dependencies>
-
   <build>
     <plugins>
       <plugin>
@@ -81,11 +83,12 @@
             <goals>
               <goal>copy-dependencies</goal>
             </goals>
-            <phase>process-resources</phase>
+            <phase>prepare-package</phase>
             <configuration>
               <outputDirectory>lib</outputDirectory>
               <!-- just grab the non-provided runtime dependencies -->
-              <includeArtifactIds>commons-lang,google-collections,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,cloudtrace,zookeeper,commons-codec</includeArtifactIds>
+              <!-- XXX we include guava at the same version as hadoop 2 provides so that we have it on hadoop 1 -->
+              <includeArtifactIds>commons-lang,guava,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,zookeeper,commons-codec,accumulo-fate,accumulo-trace</includeArtifactIds>
               <excludeTransitive>false</excludeTransitive>
             </configuration>
           </execution>
@@ -102,4 +105,47 @@
       </plugin>
     </plugins>
   </build>
+  <profiles>
+    <!-- profile for building against Hadoop 1.0.x
+    Activate by not specifying hadoop.profile -->
+    <profile>
+      <id>hadoop-1.0</id>
+      <activation>
+        <property>
+          <name>!hadoop.profile</name>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
+          <version>${hadoop.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
+    <!-- profile for building against Hadoop 2.0.x
+    Activate using: mvn -Dhadoop.profile=2.0 -->
+    <profile>
+      <id>hadoop-2.0</id>
+      <activation>
+        <property>
+          <name>hadoop.profile</name>
+          <value>2.0</value>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+          <version>${hadoop.version}</version>
+          <exclusions>
+            <exclusion>
+              <groupId>org.codehaus.jackson</groupId>
+              <artifactId>jackson-mapper-asl</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 </project>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/ingest/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
----------------------------------------------------------------------
diff --git a/ingest/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java b/ingest/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
new file mode 100644
index 0000000..f79221d
--- /dev/null
+++ b/ingest/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.wikisearch.reader;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.StringReader;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathExpression;
+import javax.xml.xpath.XPathFactory;
+
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.junit.Before;
+import org.junit.Test;
+import org.w3c.dom.Document;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class AggregatingRecordReaderTest {
+  
+  public static class MyErrorHandler implements ErrorHandler {
+    
+    @Override
+    public void error(SAXParseException exception) throws SAXException {
+      // System.out.println(exception.getMessage());
+    }
+    
+    @Override
+    public void fatalError(SAXParseException exception) throws SAXException {
+      // System.out.println(exception.getMessage());
+    }
+    
+    @Override
+    public void warning(SAXParseException exception) throws SAXException {
+      // System.out.println(exception.getMessage());
+    }
+    
+  }
+  
+  private static final String xml1 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<doc>\n" + "  <a>A</a>\n" + "  <b>B</b>\n" + "</doc>\n"
+      + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<doc>\n" + "  <a>C</a>\n" + "  <b>D</b>\n" + "</doc>\n" + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+      + "<doc>\n" + "  <a>E</a>\n" + "  <b>F</b>\n" + "</doc>\n";
+  
+  private static final String xml2 = "  <b>B</b>\n" + "</doc>\n" + "<doc>\n" + "  <a>C</a>\n" + "  <b>D</b>\n" + "</doc>\n" + "<doc>\n" + "  <a>E</a>\n"
+      + "  <b>F</b>\n" + "</doc>\n";
+  
+  private static final String xml3 = "<doc>\n" + "  <a>A</a>\n" + "  <b>B</b>\n" + "</doc>\n" + "<doc>\n" + "  <a>C</a>\n" + "  <b>D</b>\n" + "</doc>\n"
+      + "<doc>\n" + "  <a>E</a>\n";
+  
+  private static final String xml4 = "<doc>" + "  <a>A</a>" + "  <b>B</b>" + "</doc>" + "<doc>" + "  <a>C</a>" + "  <b>D</b>" + "</doc>" + "<doc>"
+      + "  <a>E</a>" + "  <b>F</b>" + "</doc>";
+  
+  private static final String xml5 = "<doc attr=\"G\">" + "  <a>A</a>" + "  <b>B</b>" + "</doc>" + "<doc>" + "  <a>C</a>" + "  <b>D</b>" + "</doc>"
+      + "<doc attr=\"H\"/>" + "<doc>" + "  <a>E</a>" + "  <b>F</b>" + "</doc>" + "<doc attr=\"I\"/>";
+  
+  private Configuration conf = null;
+  private TaskAttemptContext ctx = null;
+  private static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+  private XPathFactory xpFactory = XPathFactory.newInstance();
+  private XPathExpression EXPR_A = null;
+  private XPathExpression EXPR_B = null;
+  private XPathExpression EXPR_ATTR = null;
+  
+  @Before
+  public void setUp() throws Exception {
+    conf = new Configuration();
+    conf.set(AggregatingRecordReader.START_TOKEN, "<doc");
+    conf.set(AggregatingRecordReader.END_TOKEN, "</doc>");
+    conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(true));
+    TaskAttemptID id = new TaskAttemptID();
+    ctx = new TaskAttemptContext(conf, id);
+    XPath xp = xpFactory.newXPath();
+    EXPR_A = xp.compile("/doc/a");
+    EXPR_B = xp.compile("/doc/b");
+    EXPR_ATTR = xp.compile("/doc/@attr");
+  }
+  
+  public File createFile(String data) throws Exception {
+    // Write out test file
+    File f = File.createTempFile("aggReaderTest", ".xml");
+    f.deleteOnExit();
+    FileWriter writer = new FileWriter(f);
+    writer.write(data);
+    writer.flush();
+    writer.close();
+    return f;
+  }
+  
+  private void testXML(Text xml, String aValue, String bValue, String attrValue) throws Exception {
+    StringReader reader = new StringReader(xml.toString());
+    InputSource source = new InputSource(reader);
+    
+    DocumentBuilder parser = factory.newDocumentBuilder();
+    parser.setErrorHandler(new MyErrorHandler());
+    Document root = parser.parse(source);
+    assertNotNull(root);
+    
+    reader = new StringReader(xml.toString());
+    source = new InputSource(reader);
+    assertEquals(EXPR_A.evaluate(source), aValue);
+    
+    reader = new StringReader(xml.toString());
+    source = new InputSource(reader);
+    assertEquals(EXPR_B.evaluate(source), bValue);
+    
+    reader = new StringReader(xml.toString());
+    source = new InputSource(reader);
+    assertEquals(EXPR_ATTR.evaluate(source), attrValue);
+  }
+  
+  @Test
+  public void testIncorrectArgs() throws Exception {
+    File f = createFile(xml1);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    try {
+      // Clear the values for BEGIN and STOP TOKEN
+      conf.set(AggregatingRecordReader.START_TOKEN, null);
+      conf.set(AggregatingRecordReader.END_TOKEN, null);
+      reader.initialize(split, ctx);
+      // If we got here, then the code didnt throw an exception
+      fail();
+    } catch (Exception e) {
+      // Do nothing, we succeeded
+      f = null;
+    }
+    reader.close();
+  }
+  
+  @Test
+  public void testCorrectXML() throws Exception {
+    File f = createFile(xml1);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "A", "B", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "E", "F", "");
+    assertTrue(!reader.nextKeyValue());
+    
+  }
+  
+  @Test
+  public void testPartialXML() throws Exception {
+    File f = createFile(xml2);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "E", "F", "");
+    assertTrue(!reader.nextKeyValue());
+  }
+  
+  public void testPartialXML2WithNoPartialRecordsReturned() throws Exception {
+    conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(false));
+    File f = createFile(xml3);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "A", "B", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue(!reader.nextKeyValue());
+  }
+  
+  @Test
+  public void testPartialXML2() throws Exception {
+    File f = createFile(xml3);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "A", "B", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue(reader.nextKeyValue());
+    try {
+      testXML(reader.getCurrentValue(), "E", "", "");
+      fail("Fragment returned, and it somehow passed XML parsing.");
+    } catch (SAXParseException e) {
+      // ignore
+    }
+    assertTrue(!reader.nextKeyValue());
+  }
+  
+  @Test
+  public void testLineSplitting() throws Exception {
+    File f = createFile(xml4);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "A", "B", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "E", "F", "");
+    assertTrue(!reader.nextKeyValue());
+  }
+  
+  @Test
+  public void testNoEndTokenHandling() throws Exception {
+    File f = createFile(xml5);
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue("Not enough records returned.", reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "A", "B", "G");
+    assertTrue("Not enough records returned.", reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue("Not enough records returned.", reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "", "", "H");
+    assertTrue("Not enough records returned.", reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "E", "F", "");
+    assertTrue("Not enough records returned.", reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "", "", "I");
+    assertTrue("Too many records returned.", !reader.nextKeyValue());
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/ingest/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
----------------------------------------------------------------------
diff --git a/ingest/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java b/ingest/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
new file mode 100644
index 0000000..d9443bc
--- /dev/null
+++ b/ingest/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
@@ -0,0 +1,289 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.wikisearch.reader;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.StringReader;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathExpression;
+import javax.xml.xpath.XPathFactory;
+
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.junit.Before;
+import org.junit.Test;
+import org.w3c.dom.Document;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class AggregatingRecordReaderTest {
+  
+  public static class MyErrorHandler implements ErrorHandler {
+    
+    @Override
+    public void error(SAXParseException exception) throws SAXException {
+      // System.out.println(exception.getMessage());
+    }
+    
+    @Override
+    public void fatalError(SAXParseException exception) throws SAXException {
+      // System.out.println(exception.getMessage());
+    }
+    
+    @Override
+    public void warning(SAXParseException exception) throws SAXException {
+      // System.out.println(exception.getMessage());
+    }
+    
+  }
+  
+  private static final String xml1 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<doc>\n" + "  <a>A</a>\n" + "  <b>B</b>\n" + "</doc>\n"
+      + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<doc>\n" + "  <a>C</a>\n" + "  <b>D</b>\n" + "</doc>\n" + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+      + "<doc>\n" + "  <a>E</a>\n" + "  <b>F</b>\n" + "</doc>\n";
+  
+  private static final String xml2 = "  <b>B</b>\n" + "</doc>\n" + "<doc>\n" + "  <a>C</a>\n" + "  <b>D</b>\n" + "</doc>\n" + "<doc>\n" + "  <a>E</a>\n"
+      + "  <b>F</b>\n" + "</doc>\n";
+  
+  private static final String xml3 = "<doc>\n" + "  <a>A</a>\n" + "  <b>B</b>\n" + "</doc>\n" + "<doc>\n" + "  <a>C</a>\n" + "  <b>D</b>\n" + "</doc>\n"
+      + "<doc>\n" + "  <a>E</a>\n";
+  
+  private static final String xml4 = "<doc>" + "  <a>A</a>" + "  <b>B</b>" + "</doc>" + "<doc>" + "  <a>C</a>" + "  <b>D</b>" + "</doc>" + "<doc>"
+      + "  <a>E</a>" + "  <b>F</b>" + "</doc>";
+  
+  private static final String xml5 = "<doc attr=\"G\">" + "  <a>A</a>" + "  <b>B</b>" + "</doc>" + "<doc>" + "  <a>C</a>" + "  <b>D</b>" + "</doc>"
+      + "<doc attr=\"H\"/>" + "<doc>" + "  <a>E</a>" + "  <b>F</b>" + "</doc>" + "<doc attr=\"I\"/>";
+  
+  private Configuration conf = null;
+  private TaskAttemptContext ctx = null;
+  private static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+  private XPathFactory xpFactory = XPathFactory.newInstance();
+  private XPathExpression EXPR_A = null;
+  private XPathExpression EXPR_B = null;
+  private XPathExpression EXPR_ATTR = null;
+  
+  @Before
+  public void setUp() throws Exception {
+    conf = new Configuration();
+    conf.set(AggregatingRecordReader.START_TOKEN, "<doc");
+    conf.set(AggregatingRecordReader.END_TOKEN, "</doc>");
+    conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(true));
+    TaskAttemptID id = new TaskAttemptID();
+    ctx = new TaskAttemptContextImpl(conf, id);
+    XPath xp = xpFactory.newXPath();
+    EXPR_A = xp.compile("/doc/a");
+    EXPR_B = xp.compile("/doc/b");
+    EXPR_ATTR = xp.compile("/doc/@attr");
+  }
+  
+  public File createFile(String data) throws Exception {
+    // Write out test file
+    File f = File.createTempFile("aggReaderTest", ".xml");
+    f.deleteOnExit();
+    FileWriter writer = new FileWriter(f);
+    writer.write(data);
+    writer.flush();
+    writer.close();
+    return f;
+  }
+  
+  private void testXML(Text xml, String aValue, String bValue, String attrValue) throws Exception {
+    StringReader reader = new StringReader(xml.toString());
+    InputSource source = new InputSource(reader);
+    
+    DocumentBuilder parser = factory.newDocumentBuilder();
+    parser.setErrorHandler(new MyErrorHandler());
+    Document root = parser.parse(source);
+    assertNotNull(root);
+    
+    reader = new StringReader(xml.toString());
+    source = new InputSource(reader);
+    assertEquals(EXPR_A.evaluate(source), aValue);
+    
+    reader = new StringReader(xml.toString());
+    source = new InputSource(reader);
+    assertEquals(EXPR_B.evaluate(source), bValue);
+    
+    reader = new StringReader(xml.toString());
+    source = new InputSource(reader);
+    assertEquals(EXPR_ATTR.evaluate(source), attrValue);
+  }
+  
+  @Test
+  public void testIncorrectArgs() throws Exception {
+    File f = createFile(xml1);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    try {
+      // Clear the values for BEGIN and STOP TOKEN
+      conf.set(AggregatingRecordReader.START_TOKEN, null);
+      conf.set(AggregatingRecordReader.END_TOKEN, null);
+      reader.initialize(split, ctx);
+      // If we got here, then the code didnt throw an exception
+      fail();
+    } catch (Exception e) {
+      // Do nothing, we succeeded
+      f = null;
+    }
+    reader.close();
+  }
+  
+  @Test
+  public void testCorrectXML() throws Exception {
+    File f = createFile(xml1);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "A", "B", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "E", "F", "");
+    assertTrue(!reader.nextKeyValue());
+    
+  }
+  
+  @Test
+  public void testPartialXML() throws Exception {
+    File f = createFile(xml2);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "E", "F", "");
+    assertTrue(!reader.nextKeyValue());
+  }
+  
+  public void testPartialXML2WithNoPartialRecordsReturned() throws Exception {
+    conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(false));
+    File f = createFile(xml3);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "A", "B", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue(!reader.nextKeyValue());
+  }
+  
+  @Test
+  public void testPartialXML2() throws Exception {
+    File f = createFile(xml3);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "A", "B", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue(reader.nextKeyValue());
+    try {
+      testXML(reader.getCurrentValue(), "E", "", "");
+      fail("Fragment returned, and it somehow passed XML parsing.");
+    } catch (SAXParseException e) {
+      // ignore
+    }
+    assertTrue(!reader.nextKeyValue());
+  }
+  
+  @Test
+  public void testLineSplitting() throws Exception {
+    File f = createFile(xml4);
+    
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "A", "B", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue(reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "E", "F", "");
+    assertTrue(!reader.nextKeyValue());
+  }
+  
+  @Test
+  public void testNoEndTokenHandling() throws Exception {
+    File f = createFile(xml5);
+    // Create FileSplit
+    Path p = new Path(f.toURI().toString());
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
+    
+    // Initialize the RecordReader
+    AggregatingRecordReader reader = new AggregatingRecordReader();
+    reader.initialize(split, ctx);
+    assertTrue("Not enough records returned.", reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "A", "B", "G");
+    assertTrue("Not enough records returned.", reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "C", "D", "");
+    assertTrue("Not enough records returned.", reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "", "", "H");
+    assertTrue("Not enough records returned.", reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "E", "F", "");
+    assertTrue("Not enough records returned.", reader.nextKeyValue());
+    testXML(reader.getCurrentValue(), "", "", "I");
+    assertTrue("Too many records returned.", !reader.nextKeyValue());
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
----------------------------------------------------------------------
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
deleted file mode 100644
index f79221d..0000000
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.reader;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.StringReader;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathExpression;
-import javax.xml.xpath.XPathFactory;
-
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.junit.Before;
-import org.junit.Test;
-import org.w3c.dom.Document;
-import org.xml.sax.ErrorHandler;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.SAXParseException;
-
-public class AggregatingRecordReaderTest {
-  
-  public static class MyErrorHandler implements ErrorHandler {
-    
-    @Override
-    public void error(SAXParseException exception) throws SAXException {
-      // System.out.println(exception.getMessage());
-    }
-    
-    @Override
-    public void fatalError(SAXParseException exception) throws SAXException {
-      // System.out.println(exception.getMessage());
-    }
-    
-    @Override
-    public void warning(SAXParseException exception) throws SAXException {
-      // System.out.println(exception.getMessage());
-    }
-    
-  }
-  
-  private static final String xml1 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<doc>\n" + "  <a>A</a>\n" + "  <b>B</b>\n" + "</doc>\n"
-      + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<doc>\n" + "  <a>C</a>\n" + "  <b>D</b>\n" + "</doc>\n" + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
-      + "<doc>\n" + "  <a>E</a>\n" + "  <b>F</b>\n" + "</doc>\n";
-  
-  private static final String xml2 = "  <b>B</b>\n" + "</doc>\n" + "<doc>\n" + "  <a>C</a>\n" + "  <b>D</b>\n" + "</doc>\n" + "<doc>\n" + "  <a>E</a>\n"
-      + "  <b>F</b>\n" + "</doc>\n";
-  
-  private static final String xml3 = "<doc>\n" + "  <a>A</a>\n" + "  <b>B</b>\n" + "</doc>\n" + "<doc>\n" + "  <a>C</a>\n" + "  <b>D</b>\n" + "</doc>\n"
-      + "<doc>\n" + "  <a>E</a>\n";
-  
-  private static final String xml4 = "<doc>" + "  <a>A</a>" + "  <b>B</b>" + "</doc>" + "<doc>" + "  <a>C</a>" + "  <b>D</b>" + "</doc>" + "<doc>"
-      + "  <a>E</a>" + "  <b>F</b>" + "</doc>";
-  
-  private static final String xml5 = "<doc attr=\"G\">" + "  <a>A</a>" + "  <b>B</b>" + "</doc>" + "<doc>" + "  <a>C</a>" + "  <b>D</b>" + "</doc>"
-      + "<doc attr=\"H\"/>" + "<doc>" + "  <a>E</a>" + "  <b>F</b>" + "</doc>" + "<doc attr=\"I\"/>";
-  
-  private Configuration conf = null;
-  private TaskAttemptContext ctx = null;
-  private static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
-  private XPathFactory xpFactory = XPathFactory.newInstance();
-  private XPathExpression EXPR_A = null;
-  private XPathExpression EXPR_B = null;
-  private XPathExpression EXPR_ATTR = null;
-  
-  @Before
-  public void setUp() throws Exception {
-    conf = new Configuration();
-    conf.set(AggregatingRecordReader.START_TOKEN, "<doc");
-    conf.set(AggregatingRecordReader.END_TOKEN, "</doc>");
-    conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(true));
-    TaskAttemptID id = new TaskAttemptID();
-    ctx = new TaskAttemptContext(conf, id);
-    XPath xp = xpFactory.newXPath();
-    EXPR_A = xp.compile("/doc/a");
-    EXPR_B = xp.compile("/doc/b");
-    EXPR_ATTR = xp.compile("/doc/@attr");
-  }
-  
-  public File createFile(String data) throws Exception {
-    // Write out test file
-    File f = File.createTempFile("aggReaderTest", ".xml");
-    f.deleteOnExit();
-    FileWriter writer = new FileWriter(f);
-    writer.write(data);
-    writer.flush();
-    writer.close();
-    return f;
-  }
-  
-  private void testXML(Text xml, String aValue, String bValue, String attrValue) throws Exception {
-    StringReader reader = new StringReader(xml.toString());
-    InputSource source = new InputSource(reader);
-    
-    DocumentBuilder parser = factory.newDocumentBuilder();
-    parser.setErrorHandler(new MyErrorHandler());
-    Document root = parser.parse(source);
-    assertNotNull(root);
-    
-    reader = new StringReader(xml.toString());
-    source = new InputSource(reader);
-    assertEquals(EXPR_A.evaluate(source), aValue);
-    
-    reader = new StringReader(xml.toString());
-    source = new InputSource(reader);
-    assertEquals(EXPR_B.evaluate(source), bValue);
-    
-    reader = new StringReader(xml.toString());
-    source = new InputSource(reader);
-    assertEquals(EXPR_ATTR.evaluate(source), attrValue);
-  }
-  
-  @Test
-  public void testIncorrectArgs() throws Exception {
-    File f = createFile(xml1);
-    
-    // Create FileSplit
-    Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
-    AggregatingRecordReader reader = new AggregatingRecordReader();
-    try {
-      // Clear the values for BEGIN and STOP TOKEN
-      conf.set(AggregatingRecordReader.START_TOKEN, null);
-      conf.set(AggregatingRecordReader.END_TOKEN, null);
-      reader.initialize(split, ctx);
-      // If we got here, then the code didnt throw an exception
-      fail();
-    } catch (Exception e) {
-      // Do nothing, we succeeded
-      f = null;
-    }
-    reader.close();
-  }
-  
-  @Test
-  public void testCorrectXML() throws Exception {
-    File f = createFile(xml1);
-    
-    // Create FileSplit
-    Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
-    
-    // Initialize the RecordReader
-    AggregatingRecordReader reader = new AggregatingRecordReader();
-    reader.initialize(split, ctx);
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "A", "B", "");
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "C", "D", "");
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "E", "F", "");
-    assertTrue(!reader.nextKeyValue());
-    
-  }
-  
-  @Test
-  public void testPartialXML() throws Exception {
-    File f = createFile(xml2);
-    
-    // Create FileSplit
-    Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
-    
-    // Initialize the RecordReader
-    AggregatingRecordReader reader = new AggregatingRecordReader();
-    reader.initialize(split, ctx);
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "C", "D", "");
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "E", "F", "");
-    assertTrue(!reader.nextKeyValue());
-  }
-  
-  public void testPartialXML2WithNoPartialRecordsReturned() throws Exception {
-    conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(false));
-    File f = createFile(xml3);
-    
-    // Create FileSplit
-    Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
-    
-    // Initialize the RecordReader
-    AggregatingRecordReader reader = new AggregatingRecordReader();
-    reader.initialize(split, ctx);
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "A", "B", "");
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "C", "D", "");
-    assertTrue(!reader.nextKeyValue());
-  }
-  
-  @Test
-  public void testPartialXML2() throws Exception {
-    File f = createFile(xml3);
-    
-    // Create FileSplit
-    Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
-    
-    // Initialize the RecordReader
-    AggregatingRecordReader reader = new AggregatingRecordReader();
-    reader.initialize(split, ctx);
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "A", "B", "");
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "C", "D", "");
-    assertTrue(reader.nextKeyValue());
-    try {
-      testXML(reader.getCurrentValue(), "E", "", "");
-      fail("Fragment returned, and it somehow passed XML parsing.");
-    } catch (SAXParseException e) {
-      // ignore
-    }
-    assertTrue(!reader.nextKeyValue());
-  }
-  
-  @Test
-  public void testLineSplitting() throws Exception {
-    File f = createFile(xml4);
-    
-    // Create FileSplit
-    Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
-    
-    // Initialize the RecordReader
-    AggregatingRecordReader reader = new AggregatingRecordReader();
-    reader.initialize(split, ctx);
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "A", "B", "");
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "C", "D", "");
-    assertTrue(reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "E", "F", "");
-    assertTrue(!reader.nextKeyValue());
-  }
-  
-  @Test
-  public void testNoEndTokenHandling() throws Exception {
-    File f = createFile(xml5);
-    // Create FileSplit
-    Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
-    
-    // Initialize the RecordReader
-    AggregatingRecordReader reader = new AggregatingRecordReader();
-    reader.initialize(split, ctx);
-    assertTrue("Not enough records returned.", reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "A", "B", "G");
-    assertTrue("Not enough records returned.", reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "C", "D", "");
-    assertTrue("Not enough records returned.", reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "", "", "H");
-    assertTrue("Not enough records returned.", reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "E", "F", "");
-    assertTrue("Not enough records returned.", reader.nextKeyValue());
-    testXML(reader.getCurrentValue(), "", "", "I");
-    assertTrue("Too many records returned.", !reader.nextKeyValue());
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 919b08b..894a132 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,6 @@
   <artifactId>accumulo-wikisearch</artifactId>
   <packaging>pom</packaging>
   <name>accumulo-wikisearch</name>
-
   <modules>
     <module>ingest</module>
     <module>query</module>
@@ -38,10 +37,9 @@
     <version.commons-jexl>2.0.1</version.commons-jexl>
     <version.commons-lang>2.4</version.commons-lang>
     <version.ejb-spec-api>1.0.1.Final</version.ejb-spec-api>
-    <version.googlecollections>1.0</version.googlecollections>
+    <version.guava>11.0.2</version.guava>
     <version.jaxrs>2.1.0.GA</version.jaxrs>
     <version.kryo>1.04</version.kryo>
-    <version.libthrift>0.6.1</version.libthrift>
     <version.log4j>1.2.16</version.log4j>
     <version.log4j-extras>1.0</version.log4j-extras>
     <version.lucene>3.0.2</version.lucene>
@@ -49,14 +47,15 @@
     <version.lucene-wikipedia>3.0.2</version.lucene-wikipedia>
     <version.minlog>1.2</version.minlog>
     <version.protobuf>2.3.0</version.protobuf>
+    <version.thrift>0.9.0</version.thrift>
     <version.zookeeper>3.3.1</version.zookeeper>
   </properties>
   <dependencyManagement>
     <dependencies>
       <dependency>
-        <groupId>com.google.collections</groupId>
-        <artifactId>google-collections</artifactId>
-        <version>${version.googlecollections}</version>
+        <groupId>com.google.guava</groupId>
+        <artifactId>guava</artifactId>
+        <version>${version.guava}</version>
       </dependency>
       <dependency>
         <groupId>com.google.protobuf</groupId>
@@ -82,6 +81,12 @@
         <groupId>com.sun.jersey</groupId>
         <artifactId>jersey-server</artifactId>
         <version>1.11</version>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>commons-codec</groupId>
@@ -93,6 +98,28 @@
         <artifactId>commons-lang</artifactId>
         <version>${version.commons-lang}</version>
       </dependency>
+      <!-- XXX This is just to fix the dependency conflict in Hadoop 1 -->
+      <dependency>
+        <groupId>net.java.dev.jets3t</groupId>
+        <artifactId>jets3t</artifactId>
+        <version>0.7.1</version>
+        <exclusions>
+          <exclusion>
+            <groupId>commons-httpclient</groupId>
+            <artifactId>commons-httpclient</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.accumulo</groupId>
+        <artifactId>accumulo-core</artifactId>
+        <version>${version.accumulo}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.accumulo</groupId>
+        <artifactId>accumulo-trace</artifactId>
+        <version>${version.accumulo}</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-jexl</artifactId>
@@ -112,6 +139,24 @@
         <groupId>org.apache.lucene</groupId>
         <artifactId>lucene-wikipedia</artifactId>
         <version>${version.lucene-wikipedia}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>commons-digester</groupId>
+            <artifactId>commons-digester</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.thrift</groupId>
+        <artifactId>libthrift</artifactId>
+        <version>${version.thrift}</version>
+        <exclusions>
+            <!-- excluded to make the enforcer plug in happy-->
+            <exclusion>
+                <groupId>org.apache.httpcomponents</groupId>
+                <artifactId>httpcore</artifactId>
+            </exclusion>
+        </exclusions>
       </dependency>
     </dependencies>
   </dependencyManagement>
@@ -135,7 +180,6 @@
       <layout>default</layout>
     </repository>
   </repositories>
-
   <build>
     <defaultGoal>package</defaultGoal>
     <plugins>
@@ -148,6 +192,11 @@
             <goals>
               <goal>enforce</goal>
             </goals>
+            <configuration>
+              <rules>
+                <DependencyConvergence />
+              </rules>
+            </configuration>
           </execution>
         </executions>
       </plugin>
@@ -165,6 +214,10 @@
         </configuration>
       </plugin>
       <plugin>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <version>2.8</version>
+      </plugin>
+      <plugin>
         <artifactId>maven-jar-plugin</artifactId>
         <configuration>
           <outputDirectory>lib</outputDirectory>
@@ -218,11 +271,11 @@
             <goals>
               <goal>copy-dependencies</goal>
             </goals>
-            <phase>process-resources</phase>
+            <phase>prepare-package</phase>
             <configuration>
               <outputDirectory>../../lib</outputDirectory>
               <!-- just grab the non-provided runtime dependencies -->
-              <includeArtifactIds>commons-collections,commons-configuration,commons-io,commons-lang,jline,log4j,libthrift,commons-jci-core,commons-jci-fam,commons-logging,commons-logging-api,cloudtrace</includeArtifactIds>
+              <includeArtifactIds>commons-collections,commons-configuration,commons-io,commons-lang,jline,log4j,libthrift,commons-jci-core,commons-jci-fam,commons-logging,commons-logging-api</includeArtifactIds>
               <excludeGroupIds>accumulo</excludeGroupIds>
               <excludeTransitive>true</excludeTransitive>
             </configuration>
@@ -231,4 +284,73 @@
       </plugin>
     </plugins>
   </build>
+  <profiles>
+    <profile>
+      <!-- profile for building against Hadoop 1.0.x
+      Activate by not specifying hadoop.profile -->
+      <id>hadoop-1.0</id>
+      <activation>
+        <property>
+          <name>!hadoop.profile</name>
+        </property>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <version>1.8</version>
+            <executions>
+              <execution>
+                <id>add-test-source</id>
+                <goals>
+                  <goal>add-test-source</goal>
+                </goals>
+                <phase>generate-test-sources</phase>
+                <configuration>
+                  <sources>
+                    <source>src/test/hadoop1</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <!-- profile for building against Hadoop 2.0.x
+      Activate using: mvn -Dhadoop.profile=2.0 -->
+      <id>hadoop-2.0</id>
+      <activation>
+        <property>
+          <name>hadoop.profile</name>
+          <value>2.0</value>
+        </property>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <version>1.8</version>
+            <executions>
+              <execution>
+                <id>add-test-source</id>
+                <goals>
+                  <goal>add-test-source</goal>
+                </goals>
+                <phase>generate-test-sources</phase>
+                <configuration>
+                  <sources>
+                    <source>src/test/hadoop2</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
 </project>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index 6900919..be6f6b2 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -22,14 +22,13 @@
     <artifactId>accumulo-wikisearch</artifactId>
     <version>1.5.0</version>
   </parent>
-
   <artifactId>wikisearch-query</artifactId>
   <packaging>ejb</packaging>
   <name>wikisearch-query</name>
   <dependencies>
     <dependency>
-      <groupId>com.google.collections</groupId>
-      <artifactId>google-collections</artifactId>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
     </dependency>
     <dependency>
       <groupId>com.google.protobuf</groupId>
@@ -56,10 +55,6 @@
       <artifactId>commons-lang</artifactId>
     </dependency>
     <dependency>
-      <groupId>log4j</groupId>
-      <artifactId>log4j</artifactId>
-    </dependency>
-    <dependency>
       <groupId>org.apache.accumulo</groupId>
       <artifactId>accumulo-core</artifactId>
     </dependency>
@@ -83,6 +78,43 @@
       <scope>provided</scope>
     </dependency>
     <dependency>
+      <groupId>com.googlecode</groupId>
+      <artifactId>minlog</artifactId>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-configuration</groupId>
+      <artifactId>commons-configuration</artifactId>
+      <version>1.6</version>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>2.1</version>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.accumulo</groupId>
+      <artifactId>accumulo-fate</artifactId>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.accumulo</groupId>
+      <artifactId>accumulo-trace</artifactId>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>
@@ -99,11 +131,12 @@
             <goals>
               <goal>copy-dependencies</goal>
             </goals>
-            <phase>process-resources</phase>
+            <phase>prepare-package</phase>
             <configuration>
               <outputDirectory>lib</outputDirectory>
               <!-- just grab the non-provided runtime dependencies -->
-              <includeArtifactIds>commons-lang,commons-codec,protobuf-java,libthrift,zookeeper,hadoop-core,commons-jexl,google-collections,kryo,asm,minlog,reflectasm,wikisearch-ingest,accumulo-core,cloudtrace</includeArtifactIds>
+              <!-- XXX we include guava at the same version as hadoop 2 provides so that we have it on hadoop 1 -->
+              <includeArtifactIds>commons-io,commons-configuration,commons-lang,commons-codec,protobuf-java,libthrift,zookeeper,hadoop-core,commons-jexl,guava,kryo,asm,minlog,reflectasm,wikisearch-ingest,accumulo-core,accumulo-fate,accumulo-trace</includeArtifactIds>
               <excludeTransitive>true</excludeTransitive>
             </configuration>
           </execution>
@@ -136,4 +169,70 @@
       </plugin>
     </plugins>
   </build>
+  <profiles>
+    <!-- profile for building against Hadoop 1.0.x
+    Activate by not specifying hadoop.profile -->
+    <profile>
+      <id>hadoop-1.0</id>
+      <activation>
+        <property>
+          <name>!hadoop.profile</name>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
+          <version>${hadoop.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
+    <!-- profile for building against Hadoop 2.0.x
+    Activate using: mvn -Dhadoop.profile=2.0 -->
+    <profile>
+      <id>hadoop-2.0</id>
+      <activation>
+        <property>
+          <name>hadoop.profile</name>
+          <value>2.0</value>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+          <version>${hadoop.version}</version>
+          <exclusions>
+            <exclusion>
+              <groupId>org.codehaus.jackson</groupId>
+              <artifactId>jackson-mapper-asl</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+      </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-dependency-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>copy-dependencies</id>
+                <goals>
+                  <goal>copy-dependencies</goal>
+                </goals>
+                <phase>prepare-package</phase>
+                <configuration>
+                  <outputDirectory>lib</outputDirectory>
+                  <!-- just grab the non-provided runtime dependencies -->
+                  <includeArtifactIds>commons-io,commons-configuration,commons-lang,commons-codec,protobuf-java,libthrift,zookeeper,hadoop-client,hadoop-common,hadoop-hdfs,commons-jexl,guava,kryo,asm,minlog,reflectasm,wikisearch-ingest,accumulo-core,accumulo-fate,accumulo-trace</includeArtifactIds>
+                  <excludeTransitive>false</excludeTransitive>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
 </project>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/query/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
new file mode 100644
index 0000000..ac8241e
--- /dev/null
+++ b/query/src/test/hadoop1/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.wikisearch.logic;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+
+import junit.framework.Assert;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaIngester;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
+import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
+import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
+import org.apache.accumulo.examples.wikisearch.sample.Document;
+import org.apache.accumulo.examples.wikisearch.sample.Field;
+import org.apache.accumulo.examples.wikisearch.sample.Results;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestQueryLogic {
+  
+  private static final String METADATA_TABLE_NAME = "wikiMetadata";
+  
+  private static final String TABLE_NAME = "wiki";
+  
+  private static final String INDEX_TABLE_NAME = "wikiIndex";
+  
+  private static final String RINDEX_TABLE_NAME = "wikiReverseIndex";
+  
+  private static final String TABLE_NAMES[] = {METADATA_TABLE_NAME, TABLE_NAME, RINDEX_TABLE_NAME, INDEX_TABLE_NAME};
+  
+  private class MockAccumuloRecordWriter extends RecordWriter<Text,Mutation> {
+    @Override
+    public void write(Text key, Mutation value) throws IOException, InterruptedException {
+      try {
+        writerMap.get(key).addMutation(value);
+      } catch (MutationsRejectedException e) {
+        throw new IOException("Error adding mutation", e);
+      }
+    }
+    
+    @Override
+    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+      try {
+        for (BatchWriter w : writerMap.values()) {
+          w.flush();
+          w.close();
+        }
+      } catch (MutationsRejectedException e) {
+        throw new IOException("Error closing Batch Writer", e);
+      }
+    }
+    
+  }
+  
+  private Connector c = null;
+  private Configuration conf = new Configuration();
+  private HashMap<Text,BatchWriter> writerMap = new HashMap<Text,BatchWriter>();
+  private QueryLogic table = null;
+  
+  @Before
+  public void setup() throws Exception {
+    
+    Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.DEBUG);
+    Logger.getLogger(QueryLogic.class).setLevel(Level.DEBUG);
+    Logger.getLogger(RangeCalculator.class).setLevel(Level.DEBUG);
+    
+    conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
+    conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
+    conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME);
+    conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1");
+    conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
+    
+    MockInstance i = new MockInstance();
+    c = i.getConnector("root", new PasswordToken(""));
+    WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false);
+    for (String table : TABLE_NAMES) {
+      writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
+    }
+    
+    TaskAttemptID id = new TaskAttemptID();
+    TaskAttemptContext context = new TaskAttemptContext(conf, id);
+    
+    RawLocalFileSystem fs = new RawLocalFileSystem();
+    fs.setConf(conf);
+    
+    URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml");
+    Assert.assertNotNull(url);
+    File data = new File(url.toURI());
+    Path tmpFile = new Path(data.getAbsolutePath());
+    
+    // Setup the Mapper
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0);
+    AggregatingRecordReader rr = new AggregatingRecordReader();
+    Path ocPath = new Path(tmpFile, "oc");
+    OutputCommitter oc = new FileOutputCommitter(ocPath, context);
+    fs.deleteOnExit(ocPath);
+    StandaloneStatusReporter sr = new StandaloneStatusReporter();
+    rr.initialize(split, context);
+    MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter();
+    WikipediaMapper mapper = new WikipediaMapper();
+    
+    // Load data into Mock Accumulo
+    Mapper<LongWritable,Text,Text,Mutation>.Context con = mapper.new Context(conf, id, rr, rw, oc, sr, split);
+    mapper.run(con);
+    
+    // Flush and close record writers.
+    rw.close(context);
+    
+    table = new QueryLogic();
+    table.setMetadataTableName(METADATA_TABLE_NAME);
+    table.setTableName(TABLE_NAME);
+    table.setIndexTableName(INDEX_TABLE_NAME);
+    table.setReverseIndexTableName(RINDEX_TABLE_NAME);
+    table.setUseReadAheadIterator(false);
+    table.setUnevaluatedFields(Collections.singletonList("TEXT"));
+  }
+  
+  void debugQuery(String tableName) throws Exception {
+    Scanner s = c.createScanner(tableName, new Authorizations("all"));
+    Range r = new Range();
+    s.setRange(r);
+    for (Entry<Key,Value> entry : s)
+      System.out.println(entry.getKey().toString() + " " + entry.getValue().toString());
+  }
+  
+  @Test
+  public void testTitle() throws Exception {
+    Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF);
+    Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF);
+    List<String> auths = new ArrayList<String>();
+    auths.add("enwiki");
+    
+    Results results = table.runQuery(c, auths, "TITLE == 'asphalt' or TITLE == 'abacus' or TITLE == 'acid' or TITLE == 'acronym'", null, null, null);
+    List<Document> docs = results.getResults();
+    assertEquals(4, docs.size());
+    
+    results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null);
+    docs = results.getResults();
+    assertEquals(1, docs.size());
+    for (Document doc : docs) {
+      System.out.println("id: " + doc.getId());
+      for (Field field : doc.getFields())
+        System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
+    }
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/query/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
new file mode 100644
index 0000000..cbeefd9
--- /dev/null
+++ b/query/src/test/hadoop2/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.wikisearch.logic;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+
+import junit.framework.Assert;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaIngester;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
+import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
+import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
+import org.apache.accumulo.examples.wikisearch.sample.Document;
+import org.apache.accumulo.examples.wikisearch.sample.Field;
+import org.apache.accumulo.examples.wikisearch.sample.Results;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+import org.apache.hadoop.mapreduce.task.MapContextImpl;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
+import org.apache.hadoop.conf.Configuration.IntegerRanges;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.RawComparator;  
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.MapContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.Partitioner;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.security.Credentials;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestQueryLogic {
+  
+  private static final String METADATA_TABLE_NAME = "wikiMetadata";
+  
+  private static final String TABLE_NAME = "wiki";
+  
+  private static final String INDEX_TABLE_NAME = "wikiIndex";
+  
+  private static final String RINDEX_TABLE_NAME = "wikiReverseIndex";
+  
+  private static final String TABLE_NAMES[] = {METADATA_TABLE_NAME, TABLE_NAME, RINDEX_TABLE_NAME, INDEX_TABLE_NAME};
+  
+  private class MockAccumuloRecordWriter extends RecordWriter<Text,Mutation> {
+    @Override
+    public void write(Text key, Mutation value) throws IOException, InterruptedException {
+      try {
+        writerMap.get(key).addMutation(value);
+      } catch (MutationsRejectedException e) {
+        throw new IOException("Error adding mutation", e);
+      }
+    }
+    
+    @Override
+    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+      try {
+        for (BatchWriter w : writerMap.values()) {
+          w.flush();
+          w.close();
+        }
+      } catch (MutationsRejectedException e) {
+        throw new IOException("Error closing Batch Writer", e);
+      }
+    }
+    
+  }
+  
+  private Connector c = null;
+  private Configuration conf = new Configuration();
+  private HashMap<Text,BatchWriter> writerMap = new HashMap<Text,BatchWriter>();
+  private QueryLogic table = null;
+  
+  @Before
+  public void setup() throws Exception {
+    
+    Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.DEBUG);
+    Logger.getLogger(QueryLogic.class).setLevel(Level.DEBUG);
+    Logger.getLogger(RangeCalculator.class).setLevel(Level.DEBUG);
+    
+    conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
+    conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
+    conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME);
+    conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1");
+    conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
+    
+    MockInstance i = new MockInstance();
+    c = i.getConnector("root", new PasswordToken(""));
+    WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false);
+    for (String table : TABLE_NAMES) {
+      writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
+    }
+    
+    TaskAttemptID id = new TaskAttemptID( "fake", 1, TaskType.MAP, 1, 1);
+    TaskAttemptContext context = new TaskAttemptContextImpl(conf, id);
+    
+    RawLocalFileSystem fs = new RawLocalFileSystem();
+    fs.setConf(conf);
+    
+    URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml");
+    Assert.assertNotNull(url);
+    File data = new File(url.toURI());
+    Path tmpFile = new Path(data.getAbsolutePath());
+    
+    // Setup the Mapper
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0);
+    AggregatingRecordReader rr = new AggregatingRecordReader();
+    Path ocPath = new Path(tmpFile, "oc");
+    OutputCommitter oc = new FileOutputCommitter(ocPath, context);
+    fs.deleteOnExit(ocPath);
+    StandaloneStatusReporter sr = new StandaloneStatusReporter();
+    rr.initialize(split, context);
+    MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter();
+    WikipediaMapper mapper = new WikipediaMapper();
+    
+    // there are times I wonder, "Why do Java people think this is good?" then I drink more whiskey
+    final MapContextImpl<LongWritable,Text,Text,Mutation> mapContext = new MapContextImpl<LongWritable,Text,Text,Mutation>(conf, id, rr, rw, oc, sr, split);
+    // Load data into Mock Accumulo
+    Mapper<LongWritable,Text,Text,Mutation>.Context con = mapper.new Context() {
+      /**
+       * Get the input split for this map.
+       */
+      public InputSplit getInputSplit() {
+        return mapContext.getInputSplit();
+      }
+
+      @Override
+      public LongWritable getCurrentKey() throws IOException, InterruptedException {
+        return mapContext.getCurrentKey();
+      }
+
+      @Override
+      public Text getCurrentValue() throws IOException, InterruptedException {
+        return mapContext.getCurrentValue();
+      }
+
+      @Override
+      public boolean nextKeyValue() throws IOException, InterruptedException {
+        return mapContext.nextKeyValue();
+      }
+
+      @Override
+      public Counter getCounter(Enum<?> counterName) {
+        return mapContext.getCounter(counterName);
+      }
+
+      @Override
+      public Counter getCounter(String groupName, String counterName) {
+        return mapContext.getCounter(groupName, counterName);
+      }
+
+      @Override
+      public OutputCommitter getOutputCommitter() {
+        return mapContext.getOutputCommitter();
+      }
+
+      @Override
+      public void write(Text key, Mutation value) throws IOException,
+          InterruptedException {
+        mapContext.write(key, value);
+      }
+
+      @Override
+      public String getStatus() {
+        return mapContext.getStatus();
+      }
+
+      @Override
+      public TaskAttemptID getTaskAttemptID() {
+        return mapContext.getTaskAttemptID();
+      }
+
+      @Override
+      public void setStatus(String msg) {
+        mapContext.setStatus(msg);
+      }
+
+      @Override
+      public Path[] getArchiveClassPaths() {
+        return mapContext.getArchiveClassPaths();
+      }
+
+      @Override
+      public String[] getArchiveTimestamps() {
+        return mapContext.getArchiveTimestamps();
+      }
+
+      @Override
+      public URI[] getCacheArchives() throws IOException {
+        return mapContext.getCacheArchives();
+      }
+
+      @Override
+      public URI[] getCacheFiles() throws IOException {
+        return mapContext.getCacheArchives();
+      }
+
+      @Override
+      public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass()
+          throws ClassNotFoundException {
+        return mapContext.getCombinerClass();
+      }
+
+      @Override
+      public Configuration getConfiguration() {
+        return mapContext.getConfiguration();
+      }
+
+      @Override
+      public Path[] getFileClassPaths() {
+        return mapContext.getFileClassPaths();
+      }
+
+      @Override
+      public String[] getFileTimestamps() {
+        return mapContext.getFileTimestamps();
+      }
+
+      @Override
+      public RawComparator<?> getGroupingComparator() {
+        return mapContext.getGroupingComparator();
+      }
+
+      @Override
+      public Class<? extends InputFormat<?, ?>> getInputFormatClass()
+          throws ClassNotFoundException {
+        return mapContext.getInputFormatClass();
+      }
+
+      @Override
+      public String getJar() {
+        return mapContext.getJar();
+      }
+
+      @Override
+      public JobID getJobID() {
+        return mapContext.getJobID();
+      }
+
+      @Override
+      public String getJobName() {
+        return mapContext.getJobName();
+      }
+
+      /*@Override
+      public boolean userClassesTakesPrecedence() {
+        return mapContext.userClassesTakesPrecedence();
+      }*/
+
+      @Override
+      public boolean getJobSetupCleanupNeeded() {
+        return mapContext.getJobSetupCleanupNeeded();
+      }
+
+      @Override
+      public boolean getTaskCleanupNeeded() {
+        return mapContext.getTaskCleanupNeeded();
+      }
+
+      @Override
+      public Path[] getLocalCacheArchives() throws IOException {
+        return mapContext.getLocalCacheArchives();
+      }
+
+      @Override
+      public Path[] getLocalCacheFiles() throws IOException {
+        return mapContext.getLocalCacheFiles();
+      }
+
+      @Override
+      public Class<?> getMapOutputKeyClass() {
+        return mapContext.getMapOutputKeyClass();
+      }
+
+      @Override
+      public Class<?> getMapOutputValueClass() {
+        return mapContext.getMapOutputValueClass();
+      }
+
+      @Override
+      public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass()
+          throws ClassNotFoundException {
+        return mapContext.getMapperClass();
+      }
+
+      @Override
+      public int getMaxMapAttempts() {
+        return mapContext.getMaxMapAttempts();
+      }
+
+      @Override
+      public int getMaxReduceAttempts() {
+        return mapContext.getMaxReduceAttempts();
+      }
+
+      @Override
+      public int getNumReduceTasks() {
+        return mapContext.getNumReduceTasks();
+      }
+
+      @Override
+      public Class<? extends OutputFormat<?, ?>> getOutputFormatClass()
+          throws ClassNotFoundException {
+        return mapContext.getOutputFormatClass();
+      }
+
+      @Override
+      public Class<?> getOutputKeyClass() {
+        return mapContext.getOutputKeyClass();
+      }
+
+      @Override
+      public Class<?> getOutputValueClass() {
+        return mapContext.getOutputValueClass();
+      }
+
+      @Override
+      public Class<? extends Partitioner<?, ?>> getPartitionerClass()
+          throws ClassNotFoundException {
+        return mapContext.getPartitionerClass();
+      }
+
+      @Override
+      public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass()
+          throws ClassNotFoundException {
+        return mapContext.getReducerClass();
+      }
+
+      @Override
+      public RawComparator<?> getSortComparator() {
+        return mapContext.getSortComparator();
+      }
+
+      @Override
+      public boolean getSymlink() {
+        return mapContext.getSymlink();
+      }
+
+      @Override
+      public Path getWorkingDirectory() throws IOException {
+        return mapContext.getWorkingDirectory();
+      }
+
+      @Override
+      public void progress() {
+        mapContext.progress();
+      }
+
+      @Override
+      public boolean getProfileEnabled() {
+        return mapContext.getProfileEnabled();
+      }
+
+      @Override
+      public String getProfileParams() {
+        return mapContext.getProfileParams();
+      }
+
+      @Override
+      public IntegerRanges getProfileTaskRange(boolean isMap) {
+        return mapContext.getProfileTaskRange(isMap);
+      }
+
+      @Override
+      public String getUser() {
+        return mapContext.getUser();
+      }
+
+      @Override
+      public Credentials getCredentials() {
+        return mapContext.getCredentials();
+      }
+      
+      @Override
+      public float getProgress() {
+        return mapContext.getProgress();
+      }
+    };
+
+    mapper.run(con);
+    
+    // Flush and close record writers.
+    rw.close(context);
+    
+    table = new QueryLogic();
+    table.setMetadataTableName(METADATA_TABLE_NAME);
+    table.setTableName(TABLE_NAME);
+    table.setIndexTableName(INDEX_TABLE_NAME);
+    table.setReverseIndexTableName(RINDEX_TABLE_NAME);
+    table.setUseReadAheadIterator(false);
+    table.setUnevaluatedFields(Collections.singletonList("TEXT"));
+  }
+  
+  void debugQuery(String tableName) throws Exception {
+    Scanner s = c.createScanner(tableName, new Authorizations("all"));
+    Range r = new Range();
+    s.setRange(r);
+    for (Entry<Key,Value> entry : s)
+      System.out.println(entry.getKey().toString() + " " + entry.getValue().toString());
+  }
+  
+  @Test
+  public void testTitle() throws Exception {
+    Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF);
+    Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF);
+    List<String> auths = new ArrayList<String>();
+    auths.add("enwiki");
+    
+    Results results = table.runQuery(c, auths, "TITLE == 'asphalt' or TITLE == 'abacus' or TITLE == 'acid' or TITLE == 'acronym'", null, null, null);
+    List<Document> docs = results.getResults();
+    assertEquals(4, docs.size());
+    
+    results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null);
+    docs = results.getResults();
+    assertEquals(1, docs.size());
+    for (Document doc : docs) {
+      System.out.println("id: " + doc.getId());
+      for (Field field : doc.getFields())
+        System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
+    }
+  }
+  
+}


[02/50] [abbrv] git commit: ACCUMULO-381

Posted by uj...@apache.org.
ACCUMULO-381

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1241941 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/dfe26ba1
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/dfe26ba1
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/dfe26ba1

Branch: refs/heads/master
Commit: dfe26ba11ff25ce3e4c283447bebae887303a936
Parents: 57bf9cf
Author: Adam Fuchs <af...@apache.org>
Authored: Wed Feb 8 15:37:21 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Wed Feb 8 15:37:21 2012 +0000

----------------------------------------------------------------------
 README                        |  3 +--
 README.parallel               |  3 +--
 ingest/bin/ingest.sh          | 30 +-----------------------------
 ingest/bin/ingest_parallel.sh | 30 +-----------------------------
 ingest/pom.xml                | 14 ++++++++++++--
 5 files changed, 16 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/dfe26ba1/README
----------------------------------------------------------------------
diff --git a/README b/README
index b4391eb..4c06eda 100644
--- a/README
+++ b/README
@@ -8,8 +8,7 @@
  	Prerequisites
  	-------------
  	1. Accumulo, Hadoop, and ZooKeeper must be installed and running
- 	2. ACCUMULO_HOME and ZOOKEEPER_HOME must be defined in the environment
- 	3. One or more wikipedia dump files (http://dumps.wikimedia.org/backup-index.html) placed in an HDFS directory.
+ 	2. One or more wikipedia dump files (http://dumps.wikimedia.org/backup-index.html) placed in an HDFS directory.
 	   You will want to grab the files with the link name of pages-articles.xml.bz2
  
  

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/dfe26ba1/README.parallel
----------------------------------------------------------------------
diff --git a/README.parallel b/README.parallel
index 18cab71..04c95f4 100644
--- a/README.parallel
+++ b/README.parallel
@@ -8,8 +8,7 @@
  	Prerequisites
  	-------------
  	1. Accumulo, Hadoop, and ZooKeeper must be installed and running
- 	2. ACCUMULO_HOME and ZOOKEEPER_HOME must be defined in the environment
- 	3. One or more wikipedia dump files (http://dumps.wikimedia.org/backup-index.html) placed in an HDFS directory.
+ 	2. One or more wikipedia dump files (http://dumps.wikimedia.org/backup-index.html) placed in an HDFS directory.
 	     You will want to grab the files with the link name of pages-articles.xml.bz2
  
  

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/dfe26ba1/ingest/bin/ingest.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest.sh b/ingest/bin/ingest.sh
index 2759669..f743412 100755
--- a/ingest/bin/ingest.sh
+++ b/ingest/bin/ingest.sh
@@ -22,34 +22,6 @@ SCRIPT_DIR="${THIS_SCRIPT%/*}"
 SCRIPT_DIR=`cd $SCRIPT_DIR ; pwd`
 echo $SCRIPT_DIR
 
-ACCUMULO_HOME=${ACCUMULO_HOME}
-ZOOKEEPER_HOME=${ZOOKEEPER_HOME}
-
-#
-# Check ZOOKEEPER_HOME
-#
-if [[ -z $ZOOKEEPER_HOME ]]; then
-	echo "You must set ZOOKEEPER_HOME environment variable"
-	exit -1;
-else
-	for f in $ZOOKEEPER_HOME/zookeeper-*.jar; do
-		CLASSPATH=$f
-		break
-	done	
-fi
-
-#
-# Check ACCUMULO_HOME
-#
-if [[ -z $ACCUMULO_HOME ]]; then
-	echo "You must set ACCUMULO_HOME environment variable"
-	exit -1;
-else
-	for f in $ACCUMULO_HOME/lib/*.jar; do
-		CLASSPATH=${CLASSPATH}:$f
-	done	
-fi
-
 #
 # Add our jars
 #
@@ -60,7 +32,7 @@ done
 #
 # Transform the classpath into a comma-separated list also
 #
-LIBJARS=`echo $CLASSPATH | sed 's/:/,/g'`
+LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 
 
 #

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/dfe26ba1/ingest/bin/ingest_parallel.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest_parallel.sh b/ingest/bin/ingest_parallel.sh
index e921494..74dce9c 100755
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@ -22,34 +22,6 @@ SCRIPT_DIR="${THIS_SCRIPT%/*}"
 SCRIPT_DIR=`cd $SCRIPT_DIR ; pwd`
 echo $SCRIPT_DIR
 
-ACCUMULO_HOME=${ACCUMULO_HOME}
-ZOOKEEPER_HOME=${ZOOKEEPER_HOME}
-
-#
-# Check ZOOKEEPER_HOME
-#
-if [[ -z $ZOOKEEPER_HOME ]]; then
-	echo "You must set ZOOKEEPER_HOME environment variable"
-	exit -1;
-else
-	for f in $ZOOKEEPER_HOME/zookeeper-*.jar; do
-		CLASSPATH=$f
-		break
-	done	
-fi
-
-#
-# Check ACCUMULO_HOME
-#
-if [[ -z $ACCUMULO_HOME ]]; then
-	echo "You must set ACCUMULO_HOME environment variable"
-	exit -1;
-else
-	for f in $ACCUMULO_HOME/lib/*.jar; do
-		CLASSPATH=${CLASSPATH}:$f
-	done	
-fi
-
 #
 # Add our jars
 #
@@ -60,7 +32,7 @@ done
 #
 # Transform the classpath into a comma-separated list also
 #
-LIBJARS=`echo $CLASSPATH | sed 's/:/,/g'`
+LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 
 
 #

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/dfe26ba1/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index 68e4fb9..ac123c7 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -76,6 +76,16 @@
     	<groupId>com.sun.jersey</groupId>
     	<artifactId>jersey-server</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.accumulo</groupId>
+      <artifactId>cloudtrace</artifactId>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <scope>runtime</scope>
+    </dependency>
   </dependencies>
 
   <build>
@@ -93,8 +103,8 @@
             <configuration>
               <outputDirectory>lib</outputDirectory>
               <!-- just grab the non-provided runtime dependencies -->
-              <includeArtifactIds>commons-lang,google-collections,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java</includeArtifactIds>
-              <excludeTransitive>true</excludeTransitive>
+              <includeArtifactIds>commons-lang,google-collections,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,cloudtrace,zookeeper</includeArtifactIds>
+              <excludeTransitive>false</excludeTransitive>
             </configuration>
           </execution>
         </executions>


[17/50] [abbrv] git commit: ACCUMULO-471 document the ability to run run over uncompressed data; allow the input to be split, don't send millions of duplicate metadata table entries

Posted by uj...@apache.org.
ACCUMULO-471 document the ability to run run over uncompressed data; allow the input to be split, don't send millions of duplicate metadata table entries

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1302537 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/2c1666fd
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/2c1666fd
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/2c1666fd

Branch: refs/heads/master
Commit: 2c1666fd4fc9d696d612de2aac922f26e9d7116f
Parents: 66bb45c
Author: Eric C. Newton <ec...@apache.org>
Authored: Mon Mar 19 16:47:41 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Mon Mar 19 16:47:41 2012 +0000

----------------------------------------------------------------------
 README                                          |  7 ++++--
 .../wikisearch/ingest/WikipediaInputFormat.java |  6 -----
 .../wikisearch/ingest/WikipediaMapper.java      | 23 ++++++++++++++------
 3 files changed, 21 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/2c1666fd/README
----------------------------------------------------------------------
diff --git a/README b/README
index daec8e4..43077a7 100644
--- a/README
+++ b/README
@@ -11,7 +11,10 @@
  	1. Accumulo, Hadoop, and ZooKeeper must be installed and running
  	2. One or more wikipedia dump files (http://dumps.wikimedia.org/backup-index.html) placed in an HDFS directory.
 	   You will want to grab the files with the link name of pages-articles.xml.bz2
- 
+        3. Though not strictly required, the ingest will go more quickly if the files are decompressed:
+
+            $ bunzip2 < enwiki-*-pages-articles.xml.bz2 | hadoop fs -put - /wikipedia/enwiki-pages-articles.xml
+
  
  	INSTRUCTIONS
  	------------
@@ -70,4 +73,4 @@
 	log4j.logger.org.apache.accumulo.examples.wikisearch.iterator=INFO,A1
 	
 	This needs to be propagated to all the tablet server nodes, and accumulo needs to be restarted.
-	
\ No newline at end of file
+	

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/2c1666fd/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
index e682f2f..c582cbf 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
@@ -133,10 +133,4 @@ public class WikipediaInputFormat extends TextInputFormat {
   public RecordReader<LongWritable,Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
     return new AggregatingRecordReader();
   }
-  
-  @Override
-  protected boolean isSplitable(JobContext context, Path file) {
-    return false;
-  }
-  
 }

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/2c1666fd/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
index fc328cc..8565b09 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
@@ -119,6 +119,8 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
     return article.getId() % numPartitions;
   }
   
+  static HashSet<String> metadataSent = new HashSet<String>();
+
   @Override
   protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
     Article article = extractor.extract(new InputStreamReader(new ByteArrayInputStream(value.getBytes()), UTF8));
@@ -137,9 +139,13 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
       for (Entry<String,Object> entry : article.getFieldValues().entrySet()) {
         m.put(colfPrefix + article.getId(), entry.getKey() + NULL_BYTE + entry.getValue().toString(), cv, article.getTimestamp(), NULL_VALUE);
         // Create mutations for the metadata table.
-        Mutation mm = new Mutation(entry.getKey());
-        mm.put(METADATA_EVENT_COLUMN_FAMILY, language, cv, article.getTimestamp(), NULL_VALUE);
-        context.write(metadataTableName, mm);
+        String metadataKey = entry.getKey() + METADATA_EVENT_COLUMN_FAMILY + language;
+        if (!metadataSent.contains(metadataKey)) {
+          Mutation mm = new Mutation(entry.getKey());
+          mm.put(METADATA_EVENT_COLUMN_FAMILY, language, cv, article.getTimestamp(), NULL_VALUE);
+          context.write(metadataTableName, mm);
+          metadataSent.add(metadataKey);
+        }
       }
       
       // Tokenize the content
@@ -182,10 +188,13 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
         context.write(reverseIndexTableName, grm);
         
         // Create mutations for the metadata table.
-        Mutation mm = new Mutation(index.getKey());
-        mm.put(METADATA_INDEX_COLUMN_FAMILY, language + NULL_BYTE + LcNoDiacriticsNormalizer.class.getName(), cv, article.getTimestamp(), NULL_VALUE);
-        context.write(metadataTableName, mm);
-        
+        String metadataKey = index.getKey() + METADATA_INDEX_COLUMN_FAMILY + language;
+        if (!metadataSent.contains(metadataKey)) {
+          Mutation mm = new Mutation(index.getKey());
+          mm.put(METADATA_INDEX_COLUMN_FAMILY, language + NULL_BYTE + LcNoDiacriticsNormalizer.class.getName(), cv, article.getTimestamp(), NULL_VALUE);
+          context.write(metadataTableName, mm);
+          metadataSent.add(metadataKey);
+        }
       }
       // Add the entire text to the document section of the table.
       // row is the partition, colf is 'd', colq is language\0articleid, value is Base64 encoded GZIP'd document


[12/50] [abbrv] git commit: ACCUMULO-242 added many references to apache accumulo - merged to 1.4

Posted by uj...@apache.org.
ACCUMULO-242 added many references to apache accumulo - merged to 1.4

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1245655 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/2e366aa2
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/2e366aa2
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/2e366aa2

Branch: refs/heads/master
Commit: 2e366aa289e6e4f7f26d3b8e52e3d0b9a94ce8b8
Parents: 27fa06e
Author: Billie Rinaldi <bi...@apache.org>
Authored: Fri Feb 17 17:06:06 2012 +0000
Committer: Billie Rinaldi <bi...@apache.org>
Committed: Fri Feb 17 17:06:06 2012 +0000

----------------------------------------------------------------------
 README          | 1 +
 README.parallel | 1 +
 2 files changed, 2 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/2e366aa2/README
----------------------------------------------------------------------
diff --git a/README b/README
index 4c06eda..4844fe6 100644
--- a/README
+++ b/README
@@ -1,3 +1,4 @@
+ Apache Accumulo Wikipedia Search Example
 
  This project contains a sample application for ingesting and querying wikipedia data.
  

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/2e366aa2/README.parallel
----------------------------------------------------------------------
diff --git a/README.parallel b/README.parallel
index 04c95f4..477556b 100644
--- a/README.parallel
+++ b/README.parallel
@@ -1,3 +1,4 @@
+ Apache Accumulo Wikipedia Search Example (parallel version)
 
  This project contains a sample application for ingesting and querying wikipedia data.
  


[32/50] [abbrv] git commit: Revert "ACCUMULO-1546 updating pom version for 1.4.4 release"

Posted by uj...@apache.org.
Revert "ACCUMULO-1546 updating pom version for 1.4.4 release"

This reverts commit cd15fe21a604323745b1ca4b38da0ae14e9c6e90.


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/a208a7cf
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/a208a7cf
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/a208a7cf

Branch: refs/heads/master
Commit: a208a7cf355af80596c537ad9d8898326fbda0fc
Parents: df29719
Author: Mike Drob <md...@mdrob.com>
Authored: Thu Aug 15 01:15:47 2013 -0400
Committer: Mike Drob <md...@mdrob.com>
Committed: Thu Aug 15 01:15:47 2013 -0400

----------------------------------------------------------------------
 ingest/pom.xml    | 2 +-
 pom.xml           | 2 +-
 query-war/pom.xml | 2 +-
 query/pom.xml     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/a208a7cf/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index e84278d..f2a8f77 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4</version>
+    <version>1.4.4-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/a208a7cf/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 003b0fd..1036436 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
   <parent>
         <artifactId>accumulo-examples</artifactId>
         <groupId>org.apache.accumulo</groupId>
-        <version>1.4.4</version>
+        <version>1.4.4-SNAPSHOT</version>
         <relativePath>../</relativePath>
   </parent>
   <artifactId>accumulo-wikisearch</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/a208a7cf/query-war/pom.xml
----------------------------------------------------------------------
diff --git a/query-war/pom.xml b/query-war/pom.xml
index a425f7c..555f995 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4</version>
+    <version>1.4.4-SNAPSHOT</version>
   </parent>
 
   <artifactId>wikisearch-query-war</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/a208a7cf/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index b5ce0b0..9d20269 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4</version>
+    <version>1.4.4-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 


[19/50] [abbrv] git commit: ACCUMULO-477 use consistent names for options on IteratorSettings

Posted by uj...@apache.org.
ACCUMULO-477 use consistent names for options on IteratorSettings

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1303034 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/e7e79286
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/e7e79286
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/e7e79286

Branch: refs/heads/master
Commit: e7e792866aabd54042e488c0327f590f29993560
Parents: 5333773
Author: Eric C. Newton <ec...@apache.org>
Authored: Tue Mar 20 17:36:34 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Tue Mar 20 17:36:34 2012 +0000

----------------------------------------------------------------------
 .../accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e7e79286/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
----------------------------------------------------------------------
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
index f2b1e85..6619ede 100644
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
+++ b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
@@ -157,7 +157,7 @@ public class GlobalIndexUidTest {
     IteratorSetting setting = new IteratorSetting(1, GlobalIndexUidCombiner.class);
     GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
     GlobalIndexUidCombiner.setLossyness(setting, true);
-    comb.init(null, setting.getProperties(), null);
+    comb.init(null, setting.getOptions(), null);
     Logger.getLogger(GlobalIndexUidCombiner.class).setLevel(Level.OFF);
     Value val = new Value(UUID.randomUUID().toString().getBytes());
     values.add(val);


[42/50] [abbrv] git commit: Have builds working w/ hadoop 1. there's a test compilation failure with hadoop 2, so use -Dmaven.test.skip=true when building against hadoop 2 for now

Posted by uj...@apache.org.
Have builds working w/ hadoop 1. there's a test compilation failure with hadoop 2, so use -Dmaven.test.skip=true when building against hadoop 2 for now


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/0ef257a8
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/0ef257a8
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/0ef257a8

Branch: refs/heads/master
Commit: 0ef257a8035e3a58b1e2a866cada073e7f4b6c22
Parents: f11759e e158590
Author: Bill Slacum <bi...@koverse.com>
Authored: Fri Feb 7 01:13:08 2014 -0500
Committer: Bill Slacum <bi...@koverse.com>
Committed: Fri Feb 7 01:13:08 2014 -0500

----------------------------------------------------------------------
 accumulo-wikisearch.iml                         | 14 +++++++
 ingest/pom.xml                                  | 40 ++++++++++++--------
 pom.xml                                         |  4 +-
 query-war/pom.xml                               |  4 +-
 query/pom.xml                                   |  5 ++-
 .../wikisearch/iterator/AndIterator.java        |  1 -
 .../wikisearch/logic/TestQueryLogic.java        |  2 +-
 7 files changed, 48 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0ef257a8/accumulo-wikisearch.iml
----------------------------------------------------------------------
diff --cc accumulo-wikisearch.iml
index 0000000,0000000..8015fa7
new file mode 100644
--- /dev/null
+++ b/accumulo-wikisearch.iml
@@@ -1,0 -1,0 +1,14 @@@
++<?xml version="1.0" encoding="UTF-8"?>
++<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
++  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_6" inherit-compiler-output="false">
++    <output url="file://$MODULE_DIR$/target/classes" />
++    <output-test url="file://$MODULE_DIR$/target/test-classes" />
++    <exclude-output />
++    <content url="file://$MODULE_DIR$">
++      <excludeFolder url="file://$MODULE_DIR$/target" />
++    </content>
++    <orderEntry type="inheritedJdk" />
++    <orderEntry type="sourceFolder" forTests="false" />
++  </component>
++</module>
++

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0ef257a8/ingest/pom.xml
----------------------------------------------------------------------
diff --cc ingest/pom.xml
index d15f057,31d7110..fdf08e9
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@@ -1,57 -1,56 +1,59 @@@
 -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 +<?xml version="1.0" encoding="UTF-8"?>
- <!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
+   <!--
+     Licensed to the Apache Software Foundation (ASF) under one or more
+     contributor license agreements. See the NOTICE file distributed with
+     this work for additional information regarding copyright ownership.
+     The ASF licenses this file to You under the Apache License, Version 2.0
+     (the "License"); you may not use this file except in compliance with
+     the License. You may obtain a copy of the License at
  
-       http://www.apache.org/licenses/LICENSE-2.0
+     http://www.apache.org/licenses/LICENSE-2.0
  
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
- -->
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+   -->
 -
 +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
 -    <artifactId>accumulo-wikisearch</artifactId>
      <groupId>org.apache.accumulo</groupId>
 -    <version>1.4.5-SNAPSHOT</version>
 -    <relativePath>../</relativePath>
 +    <artifactId>accumulo-wikisearch</artifactId>
-     <version>1.5.0-SNAPSHOT</version>
++    <version>1.5.0</version>
    </parent>
+ 
    <artifactId>wikisearch-ingest</artifactId>
    <name>wikisearch-ingest</name>
+ 
    <dependencies>
      <dependency>
 -      <groupId>org.apache.zookeeper</groupId>
 -      <artifactId>zookeeper</artifactId>
 +      <groupId>com.google.collections</groupId>
 +      <artifactId>google-collections</artifactId>
      </dependency>
      <dependency>
 -      <groupId>org.apache.accumulo</groupId>
 -      <artifactId>accumulo-core</artifactId>
 +      <groupId>com.google.protobuf</groupId>
 +      <artifactId>protobuf-java</artifactId>
      </dependency>
      <dependency>
 -      <groupId>org.apache.accumulo</groupId>
 -      <artifactId>accumulo-start</artifactId>
 +      <groupId>commons-codec</groupId>
 +      <artifactId>commons-codec</artifactId>
 +    </dependency>
 +    <dependency>
 +      <groupId>commons-lang</groupId>
 +      <artifactId>commons-lang</artifactId>
      </dependency>
      <dependency>
        <groupId>log4j</groupId>
        <artifactId>log4j</artifactId>
      </dependency>
      <dependency>
 -      <groupId>commons-lang</groupId>
 -      <artifactId>commons-lang</artifactId>
 +      <groupId>org.apache.accumulo</groupId>
 +      <artifactId>accumulo-core</artifactId>
      </dependency>
      <dependency>
 -      <groupId>com.google.collections</groupId>
 -      <artifactId>google-collections</artifactId>
 +      <groupId>org.apache.hadoop</groupId>
-       <artifactId>hadoop-core</artifactId>
++      <artifactId>hadoop-client</artifactId>
      </dependency>
      <dependency>
        <groupId>org.apache.lucene</groupId>
@@@ -59,9 -58,36 +61,15 @@@
      </dependency>
      <dependency>
        <groupId>org.apache.lucene</groupId>
 -      <artifactId>lucene-analyzers</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-wikipedia</artifactId>
      </dependency>
+     <dependency>
 -      <groupId>com.google.protobuf</groupId>
 -      <artifactId>protobuf-java</artifactId>
 -    </dependency>
 -    <dependency>
 -    	<groupId>com.sun.jersey</groupId>
 -    	<artifactId>jersey-server</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.apache.accumulo</groupId>
 -      <artifactId>cloudtrace</artifactId>
 -      <scope>runtime</scope>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.apache.thrift</groupId>
 -      <artifactId>libthrift</artifactId>
 -      <scope>runtime</scope>
 -    </dependency>
 -    <dependency>
 -      <groupId>commons-codec</groupId>
 -      <artifactId>commons-codec</artifactId>
++      <groupId>junit</groupId>
++      <artifactId>junit</artifactId>
++      <scope>test</scope>
+     </dependency>
    </dependencies>
+ 
    <build>
      <plugins>
        <plugin>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0ef257a8/pom.xml
----------------------------------------------------------------------
diff --cc pom.xml
index 6a4fd39,1ea9a2a..919b08b
--- a/pom.xml
+++ b/pom.xml
@@@ -18,9 -18,10 +18,9 @@@
  <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
 -        <artifactId>accumulo-examples</artifactId>
 -        <groupId>org.apache.accumulo</groupId>
 -        <version>1.4.5-SNAPSHOT</version>
 -        <relativePath>../</relativePath>
 +    <groupId>org.apache.accumulo</groupId>
 +    <artifactId>accumulo-examples</artifactId>
-     <version>1.4.3</version>
++    <version>1.5.0</version>
    </parent>
    <artifactId>accumulo-wikisearch</artifactId>
    <packaging>pom</packaging>
@@@ -128,12 -42,18 +129,13 @@@
        <snapshots>
          <enabled>false</enabled>
        </snapshots>
 -    </repository>
 -    <repository>
        <id>java.net</id>
        <name>java.net</name>
 -      <layout>default</layout>
        <url>https://maven.java.net/content/groups/public</url>
 -      <snapshots>
 -        <enabled>false</enabled>
 -      </snapshots>
 +      <layout>default</layout>
      </repository>
    </repositories>
+ 
    <build>
      <defaultGoal>package</defaultGoal>
      <plugins>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0ef257a8/query-war/pom.xml
----------------------------------------------------------------------
diff --cc query-war/pom.xml
index 56f260a,485d584..be6e807
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@@ -18,10 -18,11 +18,11 @@@
  <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
 -    <artifactId>accumulo-wikisearch</artifactId>
      <groupId>org.apache.accumulo</groupId>
 -    <version>1.4.5-SNAPSHOT</version>
 +    <artifactId>accumulo-wikisearch</artifactId>
-     <version>1.5.0-SNAPSHOT</version>
++    <version>1.5.0</version>
    </parent>
+ 
    <artifactId>wikisearch-query-war</artifactId>
    <packaging>war</packaging>
    <name>wikisearch-query-war</name>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0ef257a8/query/pom.xml
----------------------------------------------------------------------
diff --cc query/pom.xml
index 804a992,8c6e6d3..6900919
--- a/query/pom.xml
+++ b/query/pom.xml
@@@ -16,12 -16,13 +16,13 @@@
    limitations under the License.
  -->
  <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 +  <modelVersion>4.0.0</modelVersion>
    <parent>
 -    <artifactId>accumulo-wikisearch</artifactId>
      <groupId>org.apache.accumulo</groupId>
 -    <version>1.4.5-SNAPSHOT</version>
 -    <relativePath>../</relativePath>
 +    <artifactId>accumulo-wikisearch</artifactId>
-     <version>1.5.0-SNAPSHOT</version>
++    <version>1.5.0</version>
    </parent>
+ 
    <artifactId>wikisearch-query</artifactId>
    <packaging>ejb</packaging>
    <name>wikisearch-query</name>
@@@ -68,23 -75,20 +69,23 @@@
        <version>${project.version}</version>
      </dependency>
      <dependency>
 -      <groupId>com.googlecode</groupId>
 -      <artifactId>minlog</artifactId>
 +      <groupId>org.apache.commons</groupId>
 +      <artifactId>commons-jexl</artifactId>
      </dependency>
      <dependency>
 -      <groupId>com.google.protobuf</groupId>
 -      <artifactId>protobuf-java</artifactId>
 +      <groupId>org.apache.hadoop</groupId>
-       <artifactId>hadoop-core</artifactId>
++      <artifactId>hadoop-client</artifactId>
      </dependency>
      <dependency>
 -      <groupId>commons-lang</groupId>
 -      <artifactId>commons-lang</artifactId>
 +      <groupId>javaee</groupId>
 +      <artifactId>javaee-api</artifactId>
 +      <version>5</version>
 +      <scope>provided</scope>
      </dependency>
      <dependency>
 -      <groupId>commons-codec</groupId>
 -      <artifactId>commons-codec</artifactId>
 +      <groupId>junit</groupId>
 +      <artifactId>junit</artifactId>
 +      <scope>test</scope>
      </dependency>
    </dependencies>
    <build>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0ef257a8/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0ef257a8/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
----------------------------------------------------------------------
diff --cc query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
index ebc909e,24e7379..ac8241e
--- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
+++ b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
@@@ -127,9 -126,8 +127,9 @@@ public class TestQueryLogic 
        writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
      }
      
 -    TaskAttemptContext context = ContextFactory.createTaskAttemptContext(conf);
 +    TaskAttemptID id = new TaskAttemptID();
 +    TaskAttemptContext context = new TaskAttemptContext(conf, id);
- 
+     
      RawLocalFileSystem fs = new RawLocalFileSystem();
      fs.setConf(conf);
      


[10/50] [abbrv] git commit: ACCUMULO-375 it is important to close a MultiThreadedBatchWriter when you are done with it

Posted by uj...@apache.org.
ACCUMULO-375 it is important to close a MultiThreadedBatchWriter when you are done with it

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1245170 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/1e05129d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/1e05129d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/1e05129d

Branch: refs/heads/master
Commit: 1e05129dfb96a0c6c78d7324fe4b3a73e8d39ac5
Parents: ec56d2d
Author: Adam Fuchs <af...@apache.org>
Authored: Thu Feb 16 20:46:37 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Thu Feb 16 20:46:37 2012 +0000

----------------------------------------------------------------------
 .../examples/wikisearch/ingest/WikipediaPartitionedMapper.java | 6 ++++++
 1 file changed, 6 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1e05129d/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
index 7816b03..5e82a7d 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
@@ -29,6 +29,7 @@ import java.util.Set;
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
 import org.apache.accumulo.core.client.MultiTableBatchWriter;
+import org.apache.accumulo.core.client.MutationsRejectedException;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.ColumnVisibility;
@@ -236,6 +237,11 @@ public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutatio
     wikiIndexOutput.flush();
     wikiMetadataOutput.flush();
     wikiReverseIndexOutput.flush();
+    try {
+      mtbw.close();
+    } catch (MutationsRejectedException e) {
+      throw new RuntimeException(e);
+    }
   }
 
 


[50/50] [abbrv] git commit: Merge remote-tracking branch 'origin/1.4.5-SNAPSHOT' into 1.5.0

Posted by uj...@apache.org.
Merge remote-tracking branch 'origin/1.4.5-SNAPSHOT' into 1.5.0

Conflicts:
	ingest/bin/ingest_parallel.sh
	ingest/pom.xml
	pom.xml
	query/pom.xml


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/1990979f
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/1990979f
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/1990979f

Branch: refs/heads/master
Commit: 1990979f69c3dc571628e3ef222257fed0d30c2e
Parents: 1d5c80b 73e321e
Author: Bill Slacum <bi...@koverse.com>
Authored: Wed Mar 19 12:01:06 2014 -0400
Committer: Bill Slacum <bi...@koverse.com>
Committed: Wed Mar 19 12:01:06 2014 -0400

----------------------------------------------------------------------
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1990979f/pom.xml
----------------------------------------------------------------------
diff --cc pom.xml
index 894a132,6be16f0..c62609a
--- a/pom.xml
+++ b/pom.xml
@@@ -189,14 -63,14 +189,14 @@@
          <executions>
            <execution>
              <id>enforce-mvn</id>
 +            <goals>
 +              <goal>enforce</goal>
 +            </goals>
              <configuration>
                <rules>
-                 <DependencyConvergence />
+                 <DependencyConvergence/>
                </rules>
              </configuration>
 -            <goals>
 -              <goal>enforce</goal>
 -            </goals>
            </execution>
          </executions>
        </plugin>


[30/50] [abbrv] git commit: ACCUMULO-1168 Merge back change to CHANGES from 1.4.3rc2 tag

Posted by uj...@apache.org.
ACCUMULO-1168 Merge back change to CHANGES from 1.4.3rc2 tag


git-svn-id: https://svn.apache.org/repos/asf/accumulo/branches/1.4@1457375 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/5a855a87
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/5a855a87
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/5a855a87

Branch: refs/heads/master
Commit: 5a855a8787ade93c913f78c80e4c3ee598173922
Parents: 06c5249 9986235
Author: Josh Elser <el...@apache.org>
Authored: Sun Mar 17 05:11:28 2013 +0000
Committer: Josh Elser <el...@apache.org>
Committed: Sun Mar 17 05:11:28 2013 +0000

----------------------------------------------------------------------
 ingest/bin/ingest.sh          | 2 +-
 ingest/bin/ingest_parallel.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/5a855a87/ingest/bin/ingest.sh
----------------------------------------------------------------------
diff --cc ingest/bin/ingest.sh
index 8dca09c,54ed26f..3eb5df4
--- a/ingest/bin/ingest.sh
+++ b/ingest/bin/ingest.sh
@@@ -38,7 -38,7 +38,7 @@@ LIBJARS=`echo $CLASSPATH | sed 's/^://
  #
  # Map/Reduce job
  #
- JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.3-SNAPSHOT.jar
 -JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.3.jar
++JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.4-SNAPSHOT.jar
  CONF=$SCRIPT_DIR/../conf/wikipedia.xml
  HDFS_DATA_DIR=$1
  export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/5a855a87/ingest/bin/ingest_parallel.sh
----------------------------------------------------------------------
diff --cc ingest/bin/ingest_parallel.sh
index 2f2bb5b,0a31926..c2ef4b3
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@@ -38,7 -38,7 +38,7 @@@ LIBJARS=`echo $CLASSPATH | sed 's/^://
  #
  # Map/Reduce job
  #
- JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.3-SNAPSHOT.jar
 -JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.3.jar
++JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.4-SNAPSHOT.jar
  CONF=$SCRIPT_DIR/../conf/wikipedia.xml
  HDFS_DATA_DIR=$1
  export HADOOP_CLASSPATH=$CLASSPATH


[07/50] [abbrv] git commit: ACCUMULO-381 added a bulk ingest option for wikisearch ingest

Posted by uj...@apache.org.
ACCUMULO-381 added a bulk ingest option for wikisearch ingest

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1243506 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/b4f30879
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/b4f30879
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/b4f30879

Branch: refs/heads/master
Commit: b4f30879d903e29a240680f33db7b716e9ac7145
Parents: 72b7221
Author: Adam Fuchs <af...@apache.org>
Authored: Mon Feb 13 13:49:12 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Mon Feb 13 13:49:12 2012 +0000

----------------------------------------------------------------------
 ingest/conf/wikipedia_parallel.xml.example      |  16 +++
 .../ingest/WikipediaConfiguration.java          |  22 +++-
 .../ingest/WikipediaPartitionedIngester.java    |  56 ++++++--
 .../output/BufferingRFileRecordWriter.java      | 129 +++++++++++++++++++
 .../output/SortingRFileOutputFormat.java        | 103 +++++++++++++++
 5 files changed, 314 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b4f30879/ingest/conf/wikipedia_parallel.xml.example
----------------------------------------------------------------------
diff --git a/ingest/conf/wikipedia_parallel.xml.example b/ingest/conf/wikipedia_parallel.xml.example
index cf20f01..53220f0 100644
--- a/ingest/conf/wikipedia_parallel.xml.example
+++ b/ingest/conf/wikipedia_parallel.xml.example
@@ -56,4 +56,20 @@
     <name>wikipedia.run.ingest</name>
     <value><!--whether to run the ingest step --></value>
   </property>
+  <property>
+    <name>wikipedia.bulk.ingest</name>
+    <value><!--whether to use bulk ingest vice streaming ingest --></value>
+  </property>
+  <property>
+    <name>wikipedia.bulk.ingest.dir</name>
+    <value><!--the directory to store rfiles for bulk ingest --></value>
+  </property>
+  <property>
+    <name>wikipedia.bulk.ingest.failure.dir</name>
+    <value><!--the directory to store failed rfiles after bulk ingest --></value>
+  </property>
+  <property>
+    <name>wikipedia.bulk.ingest.buffer.size</name>
+    <value><!--the ammount of memory to use for buffering and sorting key/value pairs in each mapper before writing rfiles --></value>
+  </property>
 </configuration>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b4f30879/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
index 5a0aad4..a84d90c 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
@@ -52,6 +52,10 @@ public class WikipediaConfiguration {
   
   public final static String RUN_PARTITIONER = "wikipedia.run.partitioner";
   public final static String RUN_INGEST = "wikipedia.run.ingest";
+  public final static String BULK_INGEST = "wikipedia.bulk.ingest";
+  public final static String BULK_INGEST_DIR = "wikipedia.bulk.ingest.dir";
+  public final static String BULK_INGEST_FAILURE_DIR = "wikipedia.bulk.ingest.failure.dir";
+  public final static String BULK_INGEST_BUFFER_SIZE = "wikipedia.bulk.ingest.buffer.size";
   
   
   public static String getUser(Configuration conf) {
@@ -134,6 +138,22 @@ public class WikipediaConfiguration {
     return conf.getBoolean(RUN_INGEST, true);
   }
 
+  public static boolean bulkIngest(Configuration conf) {
+    return conf.getBoolean(BULK_INGEST, true);
+  }
+
+  public static String bulkIngestDir(Configuration conf) {
+    return conf.get(BULK_INGEST_DIR);
+  }
+
+  public static String bulkIngestFailureDir(Configuration conf) {
+    return conf.get(BULK_INGEST_FAILURE_DIR);
+  }
+  
+  public static long bulkIngestBufferSize(Configuration conf) {
+    return conf.getLong(BULK_INGEST_BUFFER_SIZE,1l<<28);
+  }
+
   /**
    * Helper method to get properties from Hadoop configuration
    * 
@@ -169,5 +189,5 @@ public class WikipediaConfiguration {
       throw new IllegalArgumentException(resultClass.getSimpleName() + " is unhandled.");
     
   }
-  
+
 }

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b4f30879/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
index 5571290..ca9af6a 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
@@ -42,6 +42,7 @@ import org.apache.accumulo.core.iterators.user.SummingCombiner;
 import org.apache.accumulo.examples.wikisearch.ingest.ArticleExtractor.Article;
 import org.apache.accumulo.examples.wikisearch.iterator.GlobalIndexUidCombiner;
 import org.apache.accumulo.examples.wikisearch.iterator.TextIndexCombiner;
+import org.apache.accumulo.examples.wikisearch.output.SortingRFileOutputFormat;
 import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -53,7 +54,6 @@ import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.hadoop.util.Tool;
@@ -140,7 +140,13 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
         return result;
     }
     if(WikipediaConfiguration.runIngest(conf))
-      return runIngestJob();
+    {
+      int result = runIngestJob();
+      if(result != 0)
+        return result;
+      if(WikipediaConfiguration.bulkIngest(conf))
+        return loadBulkFiles();
+    }
     return 0;
   }
   
@@ -195,11 +201,6 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
     
     String tablename = WikipediaConfiguration.getTableName(ingestConf);
     
-    String zookeepers = WikipediaConfiguration.getZookeepers(ingestConf);
-    String instanceName = WikipediaConfiguration.getInstanceName(ingestConf);
-    
-    String user = WikipediaConfiguration.getUser(ingestConf);
-    byte[] password = WikipediaConfiguration.getPassword(ingestConf);
     Connector connector = WikipediaConfiguration.getConnector(ingestConf);
     
     TableOperations tops = connector.tableOperations();
@@ -217,13 +218,47 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
     // setup output format
     ingestJob.setMapOutputKeyClass(Text.class);
     ingestJob.setMapOutputValueClass(Mutation.class);
-    ingestJob.setOutputFormatClass(AccumuloOutputFormat.class);
-    AccumuloOutputFormat.setOutputInfo(ingestJob.getConfiguration(), user, password, true, tablename);
-    AccumuloOutputFormat.setZooKeeperInstance(ingestJob.getConfiguration(), instanceName, zookeepers);
+    
+    if(WikipediaConfiguration.bulkIngest(ingestConf))
+    {
+      ingestJob.setOutputFormatClass(AccumuloOutputFormat.class);
+      String zookeepers = WikipediaConfiguration.getZookeepers(ingestConf);
+      String instanceName = WikipediaConfiguration.getInstanceName(ingestConf);
+      String user = WikipediaConfiguration.getUser(ingestConf);
+      byte[] password = WikipediaConfiguration.getPassword(ingestConf);
+      AccumuloOutputFormat.setOutputInfo(ingestJob.getConfiguration(), user, password, true, tablename);
+      AccumuloOutputFormat.setZooKeeperInstance(ingestJob.getConfiguration(), instanceName, zookeepers);
+    } else {
+      ingestJob.setOutputFormatClass(SortingRFileOutputFormat.class);
+      SortingRFileOutputFormat.setMaxBufferSize(ingestConf, WikipediaConfiguration.bulkIngestBufferSize(ingestConf));
+      SortingRFileOutputFormat.setPathName(ingestConf, WikipediaConfiguration.bulkIngestDir(ingestConf));
+    }
     
     return ingestJob.waitForCompletion(true) ? 0 : 1;
   }
   
+  public int loadBulkFiles() throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException
+  {
+    Configuration conf = getConf();
+
+    Connector connector = WikipediaConfiguration.getConnector(conf);
+    
+    FileSystem fs = FileSystem.get(conf);
+    String directory = WikipediaConfiguration.bulkIngestDir(conf);
+    
+    String failureDirectory = WikipediaConfiguration.bulkIngestFailureDir(conf);
+    
+    for(FileStatus status: fs.listStatus(new Path(directory)))
+    {
+      if(status.isDir() == false)
+        continue;
+      Path dir = status.getPath();
+      connector.tableOperations().importDirectory(dir.getName(), dir.toString(), failureDirectory+"/"+dir.getName(), true);
+    }
+    
+    return 0;
+  }
+  
   public final static PathFilter partFilter = new PathFilter() {
     @Override
     public boolean accept(Path path) {
@@ -241,7 +276,6 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
 
   protected void configureIngestJob(Job job) {
     job.setJarByClass(WikipediaPartitionedIngester.class);
-    job.setInputFormatClass(WikipediaInputFormat.class);
   }
   
   protected static final Pattern filePattern = Pattern.compile("([a-z_]+).*.xml(.bz2)?");

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b4f30879/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
new file mode 100644
index 0000000..a7e7dcf
--- /dev/null
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
@@ -0,0 +1,129 @@
+package org.apache.accumulo.examples.wikisearch.output;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.data.ColumnUpdate;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.file.FileSKVWriter;
+import org.apache.accumulo.core.file.rfile.RFileOperations;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+final class BufferingRFileRecordWriter extends RecordWriter<Text,Mutation> {
+  private final long maxSize;
+  private final AccumuloConfiguration acuconf;
+  private final Configuration conf;
+  private final String filenamePrefix;
+  private final String taskID;
+  private final FileSystem fs;
+  private int fileCount = 0;
+  private long size;
+  
+  private Map<Text,TreeMap<Key,Value>> buffers = new HashMap<Text,TreeMap<Key,Value>>();
+  private Map<Text,Long> bufferSizes = new HashMap<Text,Long>();
+
+  private TreeMap<Key,Value> getBuffer(Text tablename)
+  {
+    TreeMap<Key,Value> buffer = buffers.get(tablename);
+    if(buffer == null)
+    {
+      buffer = new TreeMap<Key,Value>();
+      buffers.put(tablename, buffer);
+      bufferSizes.put(tablename, 0l);
+    }
+    return buffer;
+  }
+  
+  private Text getLargestTablename()
+  {
+    long max = 0;
+    Text table = null;
+    for(Entry<Text,Long> e:bufferSizes.entrySet())
+    {
+      if(e.getValue() > max)
+      {
+        max = e.getValue();
+        table = e.getKey();
+      }
+    }
+    return table;
+  }
+  
+  private void flushLargestTable() throws IOException
+  {
+    Text tablename = getLargestTablename();
+    if(tablename == null)
+      return;
+    long bufferSize = bufferSizes.get(tablename);
+    TreeMap<Key,Value> buffer = buffers.get(tablename);
+    if (buffer.size() == 0)
+      return;
+    
+    // TODO fix the filename
+    String file = filenamePrefix + "/" + tablename + "/" + taskID + "_" + (fileCount++) + ".rf";
+    FileSKVWriter writer = RFileOperations.getInstance().openWriter(file, fs, conf, acuconf);
+    
+    // forget locality groups for now, just write everything to the default
+    writer.startDefaultLocalityGroup();
+    
+    for (Entry<Key,Value> e : buffer.entrySet()) {
+      writer.append(e.getKey(), e.getValue());
+    }
+    
+    writer.close();
+    
+    size -= bufferSize;
+    buffer.clear();
+    bufferSizes.put(tablename, 0l);
+  }
+  
+  BufferingRFileRecordWriter(long maxSize, AccumuloConfiguration acuconf, Configuration conf, String filenamePrefix, String taskID, FileSystem fs) {
+    this.maxSize = maxSize;
+    this.acuconf = acuconf;
+    this.conf = conf;
+    this.filenamePrefix = filenamePrefix;
+    this.taskID = taskID;
+    this.fs = fs;
+  }
+  
+  @Override
+  public void close(TaskAttemptContext arg0) throws IOException, InterruptedException {
+    while(size > 0)
+      flushLargestTable();
+  }
+  
+  @Override
+  public void write(Text table, Mutation mutation) throws IOException, InterruptedException {
+    TreeMap<Key,Value> buffer = getBuffer(table);
+    int mutationSize = 0;
+    for(ColumnUpdate update: mutation.getUpdates())
+    {
+      Key k = new Key(mutation.getRow(),update.getColumnFamily(),update.getColumnQualifier(),update.getColumnVisibility(),update.getTimestamp(),update.isDeleted());
+      Value v = new Value(update.getValue());
+      mutationSize += k.getSize();
+      mutationSize += v.getSize();
+      buffer.put(k, v);
+    }
+    size += mutationSize;
+    long bufferSize = bufferSizes.get(table);
+    bufferSize += mutationSize;
+    bufferSizes.put(table, bufferSize);
+    
+    // TODO add object overhead size
+    
+    while (size >= maxSize) {
+      flushLargestTable();
+    }
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b4f30879/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
new file mode 100644
index 0000000..f556287
--- /dev/null
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
@@ -0,0 +1,103 @@
+package org.apache.accumulo.examples.wikisearch.output;
+
+import java.io.IOException;
+
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
+
+public class SortingRFileOutputFormat extends OutputFormat<Text,Mutation> {
+  
+  public static final String PATH_NAME = "sortingrfileoutputformat.path";
+  public static final String MAX_BUFFER_SIZE = "sortingrfileoutputformat.max.buffer.size";
+  
+  public static void setPathName(Configuration conf, String path) {
+    conf.set(PATH_NAME, path);
+  }
+  
+  public static String getPathName(Configuration conf) {
+    return conf.get(PATH_NAME);
+  }
+  
+  public static void setMaxBufferSize(Configuration conf, long maxBufferSize) {
+    conf.setLong(MAX_BUFFER_SIZE, maxBufferSize);
+  }
+  
+  public static long getMaxBufferSize(Configuration conf) {
+    return conf.getLong(MAX_BUFFER_SIZE, -1);
+  }
+  
+  @Override
+  public void checkOutputSpecs(JobContext job) throws IOException, InterruptedException {
+    // TODO make sure the path is writable?
+    // TODO make sure the max buffer size is set and is reasonable
+  }
+  
+  @Override
+  public OutputCommitter getOutputCommitter(TaskAttemptContext arg0) throws IOException, InterruptedException {
+    return new OutputCommitter() {
+      
+      @Override
+      public void setupTask(TaskAttemptContext arg0) throws IOException {
+        // TODO Auto-generated method stub
+        
+      }
+      
+      @Override
+      public void setupJob(JobContext arg0) throws IOException {
+        // TODO Auto-generated method stub
+        
+      }
+      
+      @Override
+      public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException {
+        // TODO Auto-generated method stub
+        return false;
+      }
+      
+      @Override
+      public void commitTask(TaskAttemptContext arg0) throws IOException {
+        // TODO Auto-generated method stub
+        
+      }
+      
+      @Override
+      public void cleanupJob(JobContext arg0) throws IOException {
+        // TODO Auto-generated method stub
+        
+      }
+      
+      @Override
+      public void abortTask(TaskAttemptContext arg0) throws IOException {
+        // TODO Auto-generated method stub
+        
+      }
+    };
+  }
+  
+  @Override
+  public RecordWriter<Text,Mutation> getRecordWriter(TaskAttemptContext attempt) throws IOException, InterruptedException {
+    
+    // grab the configuration
+    final Configuration conf = attempt.getConfiguration();
+    // create a filename
+    final String filenamePrefix = getPathName(conf);
+    final String taskID = attempt.getTaskAttemptID().toString();
+    // grab the max size
+    final long maxSize = getMaxBufferSize(conf);
+    // grab the FileSystem
+    final FileSystem fs = FileSystem.get(conf);
+    // create a default AccumuloConfiguration
+    final AccumuloConfiguration acuconf = AccumuloConfiguration.getDefaultConfiguration();
+    
+    return new BufferingRFileRecordWriter(maxSize, acuconf, conf, filenamePrefix, taskID, fs);
+  }
+  
+}


[03/50] [abbrv] git commit: ACCUMULO-375 tweaked split order and staticified factory code for performance

Posted by uj...@apache.org.
ACCUMULO-375 tweaked split order and staticified factory code for performance

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1241957 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/fa359318
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/fa359318
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/fa359318

Branch: refs/heads/master
Commit: fa35931803c83151406739014446cb954382d50f
Parents: dfe26ba
Author: Adam Fuchs <af...@apache.org>
Authored: Wed Feb 8 16:10:29 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Wed Feb 8 16:10:29 2012 +0000

----------------------------------------------------------------------
 .../examples/wikisearch/ingest/ArticleExtractor.java        | 9 +++++++--
 .../examples/wikisearch/ingest/WikipediaInputFormat.java    | 6 +++---
 2 files changed, 10 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/fa359318/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java
index a3dcf8d..0699cfa 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java
@@ -124,9 +124,14 @@ public class ArticleExtractor {
   
   public ArticleExtractor() {}
   
-  public Article extract(Reader reader) {
-    XMLInputFactory xmlif = XMLInputFactory.newInstance();
+  private static XMLInputFactory xmlif = XMLInputFactory.newInstance();
+
+  static
+  {
     xmlif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
+  }
+  
+  public Article extract(Reader reader) {
     
     XMLStreamReader xmlr = null;
     

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/fa359318/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
index dd2eeb9..731d02c 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
@@ -116,11 +116,11 @@ public class WikipediaInputFormat extends TextInputFormat {
     
     int numGroups = WikipediaConfiguration.getNumGroups(job.getConfiguration());
 
-    for(InputSplit split:superSplits)
+    for(int group = 0; group < numGroups; group++)
     {
-      FileSplit fileSplit = (FileSplit)split;
-      for(int group = 0; group < numGroups; group++)
+      for(InputSplit split:superSplits)
       {
+        FileSplit fileSplit = (FileSplit)split;
         splits.add(new WikipediaInputSplit(fileSplit,group));
       }
     }


[22/50] [abbrv] git commit: ACCUMULO-665: merge to 1.4 branch

Posted by uj...@apache.org.
ACCUMULO-665: merge to 1.4 branch

git-svn-id: https://svn.apache.org/repos/asf/accumulo/branches/1.4@1356403 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/86aeddab
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/86aeddab
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/86aeddab

Branch: refs/heads/master
Commit: 86aeddab6414ea4876b39bdde5d229ce1031d3a3
Parents: 5c9f45a
Author: Eric C. Newton <ec...@apache.org>
Authored: Mon Jul 2 18:53:15 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Mon Jul 2 18:53:15 2012 +0000

----------------------------------------------------------------------
 .../examples/wikisearch/iterator/AndIterator.java | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/86aeddab/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
index 5ace7c8..b469625 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
@@ -17,9 +17,11 @@
 package org.apache.accumulo.examples.wikisearch.iterator;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Map;
 
+import org.apache.accumulo.core.data.ArrayByteSequence;
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.PartialKey;
@@ -86,7 +88,8 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
   }
   
   /*
-   * | Row | Column Family | Column Qualifier | Value | {RowID} | {dataLocation} | {term}\0{dataType}\0{UID} | Empty
+   * | Row     | Column Family  | Column Qualifier          | Value 
+   * | {RowID} | {dataLocation} | {term}\0{dataType}\0{UID} | Empty
    */
   protected Text getPartition(Key key) {
     return key.getRow();
@@ -778,21 +781,24 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
     // seek each of the sources to the right column family within the row given by key
     for (int i = 0; i < sourcesCount; i++) {
       Key sourceKey;
+      Text dataLocation = (sources[i].dataLocation == null) ? nullText : sources[i].dataLocation;
+      Collection<ByteSequence> columnFamilies = new ArrayList<ByteSequence>();
+      columnFamilies.add(new ArrayByteSequence(dataLocation.getBytes(), 0, dataLocation.getLength()));
       if (range.getStartKey() != null) {
         // Build a key with the DocID if one is given
-        if (range.getStartKey().getColumnFamily() != null) {
-          sourceKey = buildKey(getPartition(range.getStartKey()), (sources[i].dataLocation == null) ? nullText : sources[i].dataLocation,
+		if (range.getStartKey().getColumnFamily() != null) {
+          sourceKey = buildKey(getPartition(range.getStartKey()), dataLocation,
               (sources[i].term == null) ? nullText : new Text(sources[i].term + "\0" + range.getStartKey().getColumnFamily()));
         } // Build a key with just the term.
         else {
-          sourceKey = buildKey(getPartition(range.getStartKey()), (sources[i].dataLocation == null) ? nullText : sources[i].dataLocation,
+          sourceKey = buildKey(getPartition(range.getStartKey()), dataLocation,
               (sources[i].term == null) ? nullText : sources[i].term);
         }
         if (!range.isStartKeyInclusive())
           sourceKey = sourceKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL);
-        sources[i].iter.seek(new Range(sourceKey, true, null, false), seekColumnFamilies, inclusive);
+        sources[i].iter.seek(new Range(sourceKey, true, null, false), columnFamilies, inclusive);
       } else {
-        sources[i].iter.seek(range, seekColumnFamilies, inclusive);
+    	sources[i].iter.seek(range, columnFamilies, inclusive);
       }
     }
     


[21/50] [abbrv] git commit: ACCUMULO-485 configure the example log level at info to simplify instructions

Posted by uj...@apache.org.
ACCUMULO-485 configure the example log level at info to simplify instructions

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1303845 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/5c9f45a1
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/5c9f45a1
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/5c9f45a1

Branch: refs/heads/master
Commit: 5c9f45a1a8b3c42f7a4a5ed8ada92ec64d2dc5d4
Parents: eb86eae
Author: Eric C. Newton <ec...@apache.org>
Authored: Thu Mar 22 15:40:40 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Thu Mar 22 15:40:40 2012 +0000

----------------------------------------------------------------------
 README | 8 --------
 1 file changed, 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/5c9f45a1/README
----------------------------------------------------------------------
diff --git a/README b/README
index 43077a7..041490f 100644
--- a/README
+++ b/README
@@ -66,11 +66,3 @@
 	There are two parameters to the REST service, query and auths. The query parameter is the same string that you would type
 	into the search box at ui.jsp, and the auths parameter is a comma-separated list of wikis that you want to search (i.e.
 	enwiki,frwiki,dewiki, etc. Or you can use all) 
-	
-	10. Optional. Add the following line to the $ACCUMULO_HOME/conf/log4j.properties file to turn off debug messages in the specialized 
-	iterators, which will dramatically increase performance:
-	
-	log4j.logger.org.apache.accumulo.examples.wikisearch.iterator=INFO,A1
-	
-	This needs to be propagated to all the tablet server nodes, and accumulo needs to be restarted.
-	


[05/50] [abbrv] git commit: ACCUMULO-381 fixed missing dependency

Posted by uj...@apache.org.
ACCUMULO-381 fixed missing dependency

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1242129 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/be69711d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/be69711d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/be69711d

Branch: refs/heads/master
Commit: be69711d6d7b1fdd4f0a7adc388ede9f66bdd977
Parents: 842696e
Author: Adam Fuchs <af...@apache.org>
Authored: Wed Feb 8 22:00:13 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Wed Feb 8 22:00:13 2012 +0000

----------------------------------------------------------------------
 ingest/pom.xml | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/be69711d/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index ac123c7..d8d26f2 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -86,6 +86,10 @@
       <artifactId>libthrift</artifactId>
       <scope>runtime</scope>
     </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+    </dependency>
   </dependencies>
 
   <build>


[13/50] [abbrv] git commit: ACCUMULO-375 made min input split size configurable

Posted by uj...@apache.org.
ACCUMULO-375 made min input split size configurable

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1245684 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/e24faaf9
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/e24faaf9
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/e24faaf9

Branch: refs/heads/master
Commit: e24faaf99b05f5c1d68bf07444043ef9bf5ba048
Parents: 2e366aa
Author: Adam Fuchs <af...@apache.org>
Authored: Fri Feb 17 18:00:57 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Fri Feb 17 18:00:57 2012 +0000

----------------------------------------------------------------------
 .../examples/wikisearch/ingest/WikipediaConfiguration.java      | 5 +++++
 .../wikisearch/ingest/WikipediaPartitionedIngester.java         | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e24faaf9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
index a84d90c..27a28a1 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
@@ -56,6 +56,7 @@ public class WikipediaConfiguration {
   public final static String BULK_INGEST_DIR = "wikipedia.bulk.ingest.dir";
   public final static String BULK_INGEST_FAILURE_DIR = "wikipedia.bulk.ingest.failure.dir";
   public final static String BULK_INGEST_BUFFER_SIZE = "wikipedia.bulk.ingest.buffer.size";
+  public final static String PARTITIONED_INPUT_MIN_SPLIT_SIZE = "wikipedia.min.input.split.size";
   
   
   public static String getUser(Configuration conf) {
@@ -130,6 +131,10 @@ public class WikipediaConfiguration {
     return new Path(conf.get(PARTITIONED_ARTICLES_DIRECTORY));
   }
   
+  public static long getMinInputSplitSize(Configuration conf) {
+    return conf.getLong(PARTITIONED_INPUT_MIN_SPLIT_SIZE, 1l << 27);
+  }
+
   public static boolean runPartitioner(Configuration conf) {
     return conf.getBoolean(RUN_PARTITIONER, false);
   }

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e24faaf9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
index bcdee43..90b8308 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
@@ -217,7 +217,8 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
     // setup input format
     ingestJob.setInputFormatClass(SequenceFileInputFormat.class);
     SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
-    SequenceFileInputFormat.setMinInputSplitSize(ingestJob, 1l << 28);
+    // TODO make split size configurable
+    SequenceFileInputFormat.setMinInputSplitSize(ingestJob, WikipediaConfiguration.getMinInputSplitSize(ingestConf));
 
     // setup output format
     ingestJob.setMapOutputKeyClass(Text.class);


[43/50] [abbrv] git commit: ACCUMULO-2017 Wikisearch should use Guava instead of Google Collections.

Posted by uj...@apache.org.
ACCUMULO-2017 Wikisearch should use Guava instead of Google Collections.

replaces Google Collections with Google Guava because that's what Hadoop 2 uses internally and they conflict.


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/e84d8d7a
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/e84d8d7a
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/e84d8d7a

Branch: refs/heads/master
Commit: e84d8d7a798a5b8098d01a2e02a17b8d20c918f8
Parents: e158590
Author: Sean Busbey <bu...@clouderagovt.com>
Authored: Wed Dec 11 00:27:16 2013 -0600
Committer: Sean Busbey <bu...@cloudera.com>
Committed: Mon Mar 10 10:08:22 2014 -0500

----------------------------------------------------------------------
 README          | 2 +-
 README.parallel | 2 +-
 ingest/pom.xml  | 7 ++++---
 pom.xml         | 8 ++++----
 query/pom.xml   | 7 ++++---
 5 files changed, 14 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e84d8d7a/README
----------------------------------------------------------------------
diff --git a/README b/README
index 041490f..869f5e7 100644
--- a/README
+++ b/README
@@ -55,7 +55,7 @@
 		kryo*.jar
 		minlog*.jar
 		commons-jexl*.jar
-		google-collections*.jar
+		guava*.jar
 		
 	8. Copy the $JBOSS_HOME/server/default/deploy/wikisearch-query*.jar to $ACCUMULO_HOME/lib/ext.
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e84d8d7a/README.parallel
----------------------------------------------------------------------
diff --git a/README.parallel b/README.parallel
index 477556b..399f0f3 100644
--- a/README.parallel
+++ b/README.parallel
@@ -52,7 +52,7 @@
 		kryo*.jar
 		minlog*.jar
 		commons-jexl*.jar
-		google-collections*.jar
+		guava*.jar
 		
 	8. Copy the $JBOSS_HOME/server/default/deploy/wikisearch-query*.jar to $ACCUMULO_HOME/lib/ext.
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e84d8d7a/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index 31d7110..c4e0336 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -49,8 +49,8 @@
       <artifactId>commons-lang</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.google.collections</groupId>
-      <artifactId>google-collections</artifactId>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
@@ -103,7 +103,8 @@
             <configuration>
               <outputDirectory>lib</outputDirectory>
               <!-- just grab the non-provided runtime dependencies -->
-              <includeArtifactIds>commons-lang,google-collections,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,cloudtrace,zookeeper,commons-codec</includeArtifactIds>
+              <!-- XXX we include guava at the same version as hadoop 2 provides so that we have it on hadoop 1 -->
+              <includeArtifactIds>commons-lang,guava,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,cloudtrace,zookeeper,commons-codec</includeArtifactIds>
               <excludeTransitive>false</excludeTransitive>
             </configuration>
           </execution>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e84d8d7a/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1ea9a2a..44996e9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -164,7 +164,7 @@
     <version.lucene-analyzers>3.0.2</version.lucene-analyzers>
     <version.lucene-wikipedia>3.0.2</version.lucene-wikipedia>
     <version.protobuf>2.3.0</version.protobuf>
-    <version.googlecollections>1.0</version.googlecollections>
+    <version.guava>11.0.2</version.guava>
     <version.libthrift>0.6.1</version.libthrift>
     <version.zookeeper>3.3.1</version.zookeeper>
     <version.minlog>1.2</version.minlog>
@@ -203,9 +203,9 @@
         <version>${version.accumulo}</version>
       </dependency>
       <dependency>
-        <groupId>com.google.collections</groupId>
-        <artifactId>google-collections</artifactId>
-        <version>${version.googlecollections}</version>
+        <groupId>com.google.guava</groupId>
+        <artifactId>guava</artifactId>
+        <version>${version.guava}</version>
       </dependency>
       <dependency>
         <groupId>com.googlecode</groupId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e84d8d7a/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index 8c6e6d3..d2f75ea 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -62,8 +62,8 @@
       <artifactId>commons-jexl</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.google.collections</groupId>
-      <artifactId>google-collections</artifactId>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
     </dependency>
     <dependency>
       <groupId>com.googlecode</groupId>
@@ -106,7 +106,8 @@
             <configuration>
               <outputDirectory>lib</outputDirectory>
               <!-- just grab the non-provided runtime dependencies -->
-              <includeArtifactIds>commons-lang,commons-codec,protobuf-java,libthrift,zookeeper,hadoop-core,commons-jexl,google-collections,kryo,asm,minlog,reflectasm,wikisearch-ingest,accumulo-core,cloudtrace</includeArtifactIds>
+              <!-- XXX we include guava at the same version as hadoop 2 provides so that we have it on hadoop 1 -->
+              <includeArtifactIds>commons-lang,commons-codec,protobuf-java,libthrift,zookeeper,hadoop-core,commons-jexl,guava,kryo,asm,minlog,reflectasm,wikisearch-ingest,accumulo-core,cloudtrace</includeArtifactIds>
               <excludeTransitive>true</excludeTransitive>
             </configuration>
           </execution>


[06/50] [abbrv] git commit: ACCUMULO-381 fixed missing runtime dependency

Posted by uj...@apache.org.
ACCUMULO-381 fixed missing runtime dependency

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1242426 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/72b7221f
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/72b7221f
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/72b7221f

Branch: refs/heads/master
Commit: 72b7221fb47ff54a06b94c584526f4f94c3788d9
Parents: be69711
Author: Adam Fuchs <af...@apache.org>
Authored: Thu Feb 9 18:09:36 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Thu Feb 9 18:09:36 2012 +0000

----------------------------------------------------------------------
 ingest/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/72b7221f/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index d8d26f2..743a4e8 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -107,7 +107,7 @@
             <configuration>
               <outputDirectory>lib</outputDirectory>
               <!-- just grab the non-provided runtime dependencies -->
-              <includeArtifactIds>commons-lang,google-collections,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,cloudtrace,zookeeper</includeArtifactIds>
+              <includeArtifactIds>commons-lang,google-collections,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,cloudtrace,zookeeper,commons-codec</includeArtifactIds>
               <excludeTransitive>false</excludeTransitive>
             </configuration>
           </execution>


[48/50] [abbrv] ACCUMULO-2446 Wikisearch now works with Accumulo 1.5.0 on both Hadoop 1.0.4 and 2.0.4-alpha.

Posted by uj...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1d5c80be/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
deleted file mode 100644
index ac8241e..0000000
--- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.logic;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map.Entry;
-
-import junit.framework.Assert;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.MutationsRejectedException;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaIngester;
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
-import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
-import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
-import org.apache.accumulo.examples.wikisearch.sample.Document;
-import org.apache.accumulo.examples.wikisearch.sample.Field;
-import org.apache.accumulo.examples.wikisearch.sample.Results;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RawLocalFileSystem;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestQueryLogic {
-  
-  private static final String METADATA_TABLE_NAME = "wikiMetadata";
-  
-  private static final String TABLE_NAME = "wiki";
-  
-  private static final String INDEX_TABLE_NAME = "wikiIndex";
-  
-  private static final String RINDEX_TABLE_NAME = "wikiReverseIndex";
-  
-  private static final String TABLE_NAMES[] = {METADATA_TABLE_NAME, TABLE_NAME, RINDEX_TABLE_NAME, INDEX_TABLE_NAME};
-  
-  private class MockAccumuloRecordWriter extends RecordWriter<Text,Mutation> {
-    @Override
-    public void write(Text key, Mutation value) throws IOException, InterruptedException {
-      try {
-        writerMap.get(key).addMutation(value);
-      } catch (MutationsRejectedException e) {
-        throw new IOException("Error adding mutation", e);
-      }
-    }
-    
-    @Override
-    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
-      try {
-        for (BatchWriter w : writerMap.values()) {
-          w.flush();
-          w.close();
-        }
-      } catch (MutationsRejectedException e) {
-        throw new IOException("Error closing Batch Writer", e);
-      }
-    }
-    
-  }
-  
-  private Connector c = null;
-  private Configuration conf = new Configuration();
-  private HashMap<Text,BatchWriter> writerMap = new HashMap<Text,BatchWriter>();
-  private QueryLogic table = null;
-  
-  @Before
-  public void setup() throws Exception {
-    
-    Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.DEBUG);
-    Logger.getLogger(QueryLogic.class).setLevel(Level.DEBUG);
-    Logger.getLogger(RangeCalculator.class).setLevel(Level.DEBUG);
-    
-    conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
-    conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
-    conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME);
-    conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1");
-    conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
-    
-    MockInstance i = new MockInstance();
-    c = i.getConnector("root", new PasswordToken(""));
-    WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false);
-    for (String table : TABLE_NAMES) {
-      writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
-    }
-    
-    TaskAttemptID id = new TaskAttemptID();
-    TaskAttemptContext context = new TaskAttemptContext(conf, id);
-    
-    RawLocalFileSystem fs = new RawLocalFileSystem();
-    fs.setConf(conf);
-    
-    URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml");
-    Assert.assertNotNull(url);
-    File data = new File(url.toURI());
-    Path tmpFile = new Path(data.getAbsolutePath());
-    
-    // Setup the Mapper
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0);
-    AggregatingRecordReader rr = new AggregatingRecordReader();
-    Path ocPath = new Path(tmpFile, "oc");
-    OutputCommitter oc = new FileOutputCommitter(ocPath, context);
-    fs.deleteOnExit(ocPath);
-    StandaloneStatusReporter sr = new StandaloneStatusReporter();
-    rr.initialize(split, context);
-    MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter();
-    WikipediaMapper mapper = new WikipediaMapper();
-    
-    // Load data into Mock Accumulo
-    Mapper<LongWritable,Text,Text,Mutation>.Context con = mapper.new Context(conf, id, rr, rw, oc, sr, split);
-    mapper.run(con);
-    
-    // Flush and close record writers.
-    rw.close(context);
-    
-    table = new QueryLogic();
-    table.setMetadataTableName(METADATA_TABLE_NAME);
-    table.setTableName(TABLE_NAME);
-    table.setIndexTableName(INDEX_TABLE_NAME);
-    table.setReverseIndexTableName(RINDEX_TABLE_NAME);
-    table.setUseReadAheadIterator(false);
-    table.setUnevaluatedFields(Collections.singletonList("TEXT"));
-  }
-  
-  void debugQuery(String tableName) throws Exception {
-    Scanner s = c.createScanner(tableName, new Authorizations("all"));
-    Range r = new Range();
-    s.setRange(r);
-    for (Entry<Key,Value> entry : s)
-      System.out.println(entry.getKey().toString() + " " + entry.getValue().toString());
-  }
-  
-  @Test
-  public void testTitle() throws Exception {
-    Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF);
-    Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF);
-    List<String> auths = new ArrayList<String>();
-    auths.add("enwiki");
-    
-    Results results = table.runQuery(c, auths, "TITLE == 'asphalt' or TITLE == 'abacus' or TITLE == 'acid' or TITLE == 'acronym'", null, null, null);
-    List<Document> docs = results.getResults();
-    assertEquals(4, docs.size());
-    
-    results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null);
-    docs = results.getResults();
-    assertEquals(1, docs.size());
-    for (Document doc : docs) {
-      System.out.println("id: " + doc.getId());
-      for (Field field : doc.getFields())
-        System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
-    }
-  }
-  
-}


[31/50] [abbrv] git commit: ACCUMULO-1546 updating pom version for 1.4.4 release

Posted by uj...@apache.org.
ACCUMULO-1546 updating pom version for 1.4.4 release


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/df297196
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/df297196
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/df297196

Branch: refs/heads/master
Commit: df2971968f0413a18e65e6f8722feb36418b33f9
Parents: 5a855a8
Author: Mike Drob <md...@mdrob.com>
Authored: Wed Aug 7 16:27:07 2013 -0400
Committer: Mike Drob <md...@mdrob.com>
Committed: Wed Aug 7 16:27:07 2013 -0400

----------------------------------------------------------------------
 ingest/pom.xml    | 2 +-
 pom.xml           | 2 +-
 query-war/pom.xml | 2 +-
 query/pom.xml     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/df297196/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index f2a8f77..e84278d 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4-SNAPSHOT</version>
+    <version>1.4.4</version>
     <relativePath>../</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/df297196/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1036436..003b0fd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
   <parent>
         <artifactId>accumulo-examples</artifactId>
         <groupId>org.apache.accumulo</groupId>
-        <version>1.4.4-SNAPSHOT</version>
+        <version>1.4.4</version>
         <relativePath>../</relativePath>
   </parent>
   <artifactId>accumulo-wikisearch</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/df297196/query-war/pom.xml
----------------------------------------------------------------------
diff --git a/query-war/pom.xml b/query-war/pom.xml
index 555f995..a425f7c 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4-SNAPSHOT</version>
+    <version>1.4.4</version>
   </parent>
 
   <artifactId>wikisearch-query-war</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/df297196/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index 9d20269..b5ce0b0 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4-SNAPSHOT</version>
+    <version>1.4.4</version>
     <relativePath>../</relativePath>
   </parent>
 


[26/50] [abbrv] git commit: ACCUMULO-819 switch the version information in the 1.4 branch

Posted by uj...@apache.org.
ACCUMULO-819 switch the version information in the 1.4 branch

git-svn-id: https://svn.apache.org/repos/asf/accumulo/branches/1.4@1399202 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/ab20ab7b
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/ab20ab7b
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/ab20ab7b

Branch: refs/heads/master
Commit: ab20ab7b17216aea2b96057cce90f6e7ec844d68
Parents: 873f98c
Author: Eric C. Newton <ec...@apache.org>
Authored: Wed Oct 17 12:35:26 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Wed Oct 17 12:35:26 2012 +0000

----------------------------------------------------------------------
 ingest/bin/ingest.sh          | 2 +-
 ingest/bin/ingest_parallel.sh | 2 +-
 ingest/pom.xml                | 2 +-
 pom.xml                       | 2 +-
 query-war/pom.xml             | 2 +-
 query/pom.xml                 | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/ab20ab7b/ingest/bin/ingest.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest.sh b/ingest/bin/ingest.sh
index 16d3cf5..8dca09c 100755
--- a/ingest/bin/ingest.sh
+++ b/ingest/bin/ingest.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.2-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.3-SNAPSHOT.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/ab20ab7b/ingest/bin/ingest_parallel.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest_parallel.sh b/ingest/bin/ingest_parallel.sh
index f83bf0b..2f2bb5b 100755
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.2-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.3-SNAPSHOT.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/ab20ab7b/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index 0dd3aa5..e5124eb 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.2-SNAPSHOT</version>
+    <version>1.4.3-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/ab20ab7b/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index c517589..eedf7b6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
   <parent>
         <artifactId>accumulo-examples</artifactId>
         <groupId>org.apache.accumulo</groupId>
-        <version>1.4.2-SNAPSHOT</version>
+        <version>1.4.3-SNAPSHOT</version>
         <relativePath>../</relativePath>
   </parent>
   <artifactId>accumulo-wikisearch</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/ab20ab7b/query-war/pom.xml
----------------------------------------------------------------------
diff --git a/query-war/pom.xml b/query-war/pom.xml
index 1e9fd2c..47133b4 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.2-SNAPSHOT</version>
+    <version>1.4.3-SNAPSHOT</version>
   </parent>
 
   <artifactId>wikisearch-query-war</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/ab20ab7b/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index 488c897..d80d28c 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.2-SNAPSHOT</version>
+    <version>1.4.3-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 


[16/50] [abbrv] git commit: ACCUMULO-469 added license headers

Posted by uj...@apache.org.
ACCUMULO-469 added license headers

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1302533 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/66bb45c9
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/66bb45c9
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/66bb45c9

Branch: refs/heads/master
Commit: 66bb45c9b23bb4a91fa2c2aa8d12b320a5995016
Parents: e1dfeb6
Author: Billie Rinaldi <bi...@apache.org>
Authored: Mon Mar 19 16:34:43 2012 +0000
Committer: Billie Rinaldi <bi...@apache.org>
Committed: Mon Mar 19 16:34:43 2012 +0000

----------------------------------------------------------------------
 .../wikisearch/ingest/LRUOutputCombiner.java    | 24 ++++++++--
 .../output/BufferingRFileRecordWriter.java      | 50 ++++++++++++--------
 .../output/SortingRFileOutputFormat.java        | 20 +++++++-
 .../ingest/WikipediaInputSplitTest.java         | 39 ++++++++++-----
 4 files changed, 93 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/66bb45c9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/LRUOutputCombiner.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/LRUOutputCombiner.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/LRUOutputCombiner.java
index e641f36..7d7b6dc 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/LRUOutputCombiner.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/LRUOutputCombiner.java
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.accumulo.examples.wikisearch.ingest;
 
 import java.util.LinkedHashMap;
@@ -6,14 +22,12 @@ import java.util.Map;
 public class LRUOutputCombiner<Key,Value> extends LinkedHashMap<Key,Value> {
   
   private static final long serialVersionUID = 1L;
-
-  public static abstract class Fold <Value>
-  {
+  
+  public static abstract class Fold<Value> {
     public abstract Value fold(Value oldValue, Value newValue);
   }
   
-  public static abstract class Output<Key,Value>
-  {
+  public static abstract class Output<Key,Value> {
     public abstract void output(Key key, Value value);
   }
   

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/66bb45c9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
index 579bbe1..9b663de 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
@@ -1,10 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.accumulo.examples.wikisearch.output;
 
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.TreeMap;
 import java.util.Map.Entry;
+import java.util.TreeMap;
 
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.ColumnUpdate;
@@ -31,12 +47,10 @@ final class BufferingRFileRecordWriter extends RecordWriter<Text,Mutation> {
   
   private Map<Text,TreeMap<Key,Value>> buffers = new HashMap<Text,TreeMap<Key,Value>>();
   private Map<Text,Long> bufferSizes = new HashMap<Text,Long>();
-
-  private TreeMap<Key,Value> getBuffer(Text tablename)
-  {
+  
+  private TreeMap<Key,Value> getBuffer(Text tablename) {
     TreeMap<Key,Value> buffer = buffers.get(tablename);
-    if(buffer == null)
-    {
+    if (buffer == null) {
       buffer = new TreeMap<Key,Value>();
       buffers.put(tablename, buffer);
       bufferSizes.put(tablename, 0l);
@@ -44,14 +58,11 @@ final class BufferingRFileRecordWriter extends RecordWriter<Text,Mutation> {
     return buffer;
   }
   
-  private Text getLargestTablename()
-  {
+  private Text getLargestTablename() {
     long max = 0;
     Text table = null;
-    for(Entry<Text,Long> e:bufferSizes.entrySet())
-    {
-      if(e.getValue() > max)
-      {
+    for (Entry<Text,Long> e : bufferSizes.entrySet()) {
+      if (e.getValue() > max) {
         max = e.getValue();
         table = e.getKey();
       }
@@ -59,10 +70,9 @@ final class BufferingRFileRecordWriter extends RecordWriter<Text,Mutation> {
     return table;
   }
   
-  private void flushLargestTable() throws IOException
-  {
+  private void flushLargestTable() throws IOException {
     Text tablename = getLargestTablename();
-    if(tablename == null)
+    if (tablename == null)
       return;
     long bufferSize = bufferSizes.get(tablename);
     TreeMap<Key,Value> buffer = buffers.get(tablename);
@@ -98,7 +108,7 @@ final class BufferingRFileRecordWriter extends RecordWriter<Text,Mutation> {
   
   @Override
   public void close(TaskAttemptContext arg0) throws IOException, InterruptedException {
-    while(size > 0)
+    while (size > 0)
       flushLargestTable();
   }
   
@@ -106,9 +116,9 @@ final class BufferingRFileRecordWriter extends RecordWriter<Text,Mutation> {
   public void write(Text table, Mutation mutation) throws IOException, InterruptedException {
     TreeMap<Key,Value> buffer = getBuffer(table);
     int mutationSize = 0;
-    for(ColumnUpdate update: mutation.getUpdates())
-    {
-      Key k = new Key(mutation.getRow(),update.getColumnFamily(),update.getColumnQualifier(),update.getColumnVisibility(),update.getTimestamp(),update.isDeleted());
+    for (ColumnUpdate update : mutation.getUpdates()) {
+      Key k = new Key(mutation.getRow(), update.getColumnFamily(), update.getColumnQualifier(), update.getColumnVisibility(), update.getTimestamp(),
+          update.isDeleted());
       Value v = new Value(update.getValue());
       // TODO account for object overhead
       mutationSize += k.getSize();
@@ -121,7 +131,7 @@ final class BufferingRFileRecordWriter extends RecordWriter<Text,Mutation> {
     // TODO use a MutableLong instead
     bufferSize += mutationSize;
     bufferSizes.put(table, bufferSize);
-
+    
     while (size >= maxSize) {
       flushLargestTable();
     }

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/66bb45c9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
index 2738e2c..1fa8fdc 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.accumulo.examples.wikisearch.output;
 
 import java.io.IOException;
@@ -14,9 +30,9 @@ import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
 public class SortingRFileOutputFormat extends OutputFormat<Text,Mutation> {
-
+  
   // private static final Logger log = Logger.getLogger(SortingRFileOutputFormat.class);
-
+  
   public static final String PATH_NAME = "sortingrfileoutputformat.path";
   public static final String MAX_BUFFER_SIZE = "sortingrfileoutputformat.max.buffer.size";
   

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/66bb45c9/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
----------------------------------------------------------------------
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
index 3da5a62..f6b2791 100644
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
+++ b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
@@ -1,9 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.accumulo.examples.wikisearch.ingest;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataInput;
-import java.io.DataOutput;
 import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
@@ -17,20 +32,19 @@ import org.junit.Test;
 
 public class WikipediaInputSplitTest {
   @Test
-  public void testSerialization() throws IOException
-  {
+  public void testSerialization() throws IOException {
     Path testPath = new Path("/foo/bar");
-    String [] hosts = new String [2];
+    String[] hosts = new String[2];
     hosts[0] = "abcd";
     hosts[1] = "efgh";
-    FileSplit fSplit = new FileSplit(testPath,1,2,hosts);
-    WikipediaInputSplit split = new WikipediaInputSplit(fSplit,7);
+    FileSplit fSplit = new FileSplit(testPath, 1, 2, hosts);
+    WikipediaInputSplit split = new WikipediaInputSplit(fSplit, 7);
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     ObjectOutputStream out = new ObjectOutputStream(baos);
     split.write(out);
-    out.close();    
+    out.close();
     baos.close();
-
+    
     ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
     DataInput in = new ObjectInputStream(bais);
     
@@ -45,12 +59,11 @@ public class WikipediaInputSplitTest {
     Assert.assertTrue(fSplit.getPath().equals(fSplit2.getPath()));
     Assert.assertTrue(fSplit.getStart() == fSplit2.getStart());
     Assert.assertTrue(fSplit.getLength() == fSplit2.getLength());
-
-    String [] hosts2 = fSplit2.getLocations();
+    
+    String[] hosts2 = fSplit2.getLocations();
     Assert.assertEquals(hosts.length, hosts2.length);
-    for(int i = 0; i < hosts.length; i++)
-    {
-      Assert.assertEquals(hosts[i],hosts2[i]);
+    for (int i = 0; i < hosts.length; i++) {
+      Assert.assertEquals(hosts[i], hosts2[i]);
     }
   }
 }


[18/50] [abbrv] git commit: ACCUMULO-474: memoize ColumnVisibility objects to decrease memory usage

Posted by uj...@apache.org.
ACCUMULO-474: memoize ColumnVisibility objects to decrease memory usage

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1302914 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/53337735
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/53337735
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/53337735

Branch: refs/heads/master
Commit: 533377356b3f2999c7e15d7e948c487d22463186
Parents: 2c1666f
Author: Eric C. Newton <ec...@apache.org>
Authored: Tue Mar 20 14:32:50 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Tue Mar 20 14:32:50 2012 +0000

----------------------------------------------------------------------
 .../wikisearch/iterator/EvaluatingIterator.java    | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/53337735/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java
index 8dde40f..d51023c 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java
@@ -30,12 +30,14 @@ import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.security.ColumnVisibility;
 import org.apache.accumulo.examples.wikisearch.parser.EventFields;
 import org.apache.accumulo.examples.wikisearch.parser.EventFields.FieldValue;
+import org.apache.commons.collections.map.LRUMap;
 import org.apache.hadoop.io.Text;
 
 
 public class EvaluatingIterator extends AbstractEvaluatingIterator {
   
   public static final String NULL_BYTE_STRING = "\u0000";
+  LRUMap visibilityMap = new LRUMap();
   
   public EvaluatingIterator() {
     super();
@@ -78,7 +80,20 @@ public class EvaluatingIterator extends AbstractEvaluatingIterator {
     String fieldName = colq.substring(0, idx);
     String fieldValue = colq.substring(idx + 1);
     
-    event.put(fieldName, new FieldValue(new ColumnVisibility(key.getColumnVisibility().getBytes()), fieldValue.getBytes()));
+    event.put(fieldName, new FieldValue(getColumnVisibility(key), fieldValue.getBytes()));
+  }
+
+  /**
+   * @param key
+   * @return
+   */
+  public ColumnVisibility getColumnVisibility(Key key) {
+    ColumnVisibility result = (ColumnVisibility) visibilityMap.get(key.getColumnVisibility());
+    if (result != null) 
+      return result;
+    result = new ColumnVisibility(key.getColumnVisibility().getBytes());
+    visibilityMap.put(key.getColumnVisibility(), result);
+    return result;
   }
   
   /**


[45/50] [abbrv] git commit: ACCUMULO-2019 Fix packaging and deployment instructions for wikisearch query interface.

Posted by uj...@apache.org.
ACCUMULO-2019 Fix packaging and deployment instructions for wikisearch query interface.

* update directions for project move to contrib
* update $ACCUMULO_HOME/lib/ext needs based on what ships with current Accumulo installation.
* update the dependencies included in the query dist tarball for the hadoop 2 profile.


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/1e4a40bc
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/1e4a40bc
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/1e4a40bc

Branch: refs/heads/master
Commit: 1e4a40bcc6cd11100def006f26037dbe0cd14e71
Parents: bcf16ab
Author: Sean Busbey <bu...@clouderagovt.com>
Authored: Fri Dec 13 14:30:59 2013 -0600
Committer: Sean Busbey <bu...@cloudera.com>
Committed: Mon Mar 10 10:16:15 2014 -0500

----------------------------------------------------------------------
 README        |  6 ++----
 query/pom.xml | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1e4a40bc/README
----------------------------------------------------------------------
diff --git a/README b/README
index 869f5e7..ad28cdc 100644
--- a/README
+++ b/README
@@ -38,11 +38,11 @@
 	1. Copy the query/src/main/resources/META-INF/ejb-jar.xml.example file to 
 	   query/src/main/resources/META-INF/ejb-jar.xml. Modify to the file to contain the same 
 	   information that you put into the wikipedia.xml file from the Ingest step above. 
-	2. Re-build the query distribution by running 'mvn package assembly:single' in the top-level directory. 
+	2. Re-build the query distribution by running 'mvn package assembly:single' in the query module's directory.
         3. Untar the resulting file in the $JBOSS_HOME/server/default directory.
 
               $ cd $JBOSS_HOME/server/default
-              $ tar -xzf $ACCUMULO_HOME/src/examples/wikisearch/query/target/wikisearch-query*.tar.gz
+              $ tar -xzf /some/path/to/wikisearch/query/target/wikisearch-query*.tar.gz
  
            This will place the dependent jars in the lib directory and the EJB jar into the deploy directory.
 	4. Next, copy the wikisearch*.war file in the query-war/target directory to $JBOSS_HOME/server/default/deploy. 
@@ -51,11 +51,9 @@
 			setauths -u <user> -s all,enwiki,eswiki,frwiki,fawiki
 	7. Copy the following jars to the $ACCUMULO_HOME/lib/ext directory from the $JBOSS_HOME/server/default/lib directory:
 	
-		commons-lang*.jar
 		kryo*.jar
 		minlog*.jar
 		commons-jexl*.jar
-		guava*.jar
 		
 	8. Copy the $JBOSS_HOME/server/default/deploy/wikisearch-query*.jar to $ACCUMULO_HOME/lib/ext.
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/1e4a40bc/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index d2f75ea..eb932f9 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -175,6 +175,29 @@
           <artifactId>hadoop-client</artifactId>
         </dependency>
       </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-dependency-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>copy-dependencies</id>
+                <phase>process-resources</phase>
+                <goals>
+                  <goal>copy-dependencies</goal>
+                </goals>
+                <configuration>
+                  <outputDirectory>lib</outputDirectory>
+                  <!-- just grab the non-provided runtime dependencies -->
+                  <includeArtifactIds>commons-lang,commons-codec,protobuf-java,libthrift,zookeeper,hadoop-client,hadoop-common,hadoop-hdfs,commons-jexl,guava,kryo,asm,minlog,reflectasm,wikisearch-ingest,accumulo-core,cloudtrace</includeArtifactIds>
+                  <excludeTransitive>false</excludeTransitive>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
     </profile>
   </profiles>
 


[08/50] [abbrv] git commit: ACCUMULO-375

Posted by uj...@apache.org.
ACCUMULO-375

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1243961 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/0e1e67db
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/0e1e67db
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/0e1e67db

Branch: refs/heads/master
Commit: 0e1e67dba93a200297da959823526d2264a85eab
Parents: b4f3087
Author: Adam Fuchs <af...@apache.org>
Authored: Tue Feb 14 14:46:37 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Tue Feb 14 14:46:37 2012 +0000

----------------------------------------------------------------------
 .../ingest/WikipediaPartitionedIngester.java    | 30 ++++++++++++++------
 .../output/SortingRFileOutputFormat.java        |  6 +++-
 2 files changed, 26 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0e1e67db/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
index ca9af6a..bcdee43 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
@@ -39,6 +39,7 @@ import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
 import org.apache.accumulo.core.iterators.user.SummingCombiner;
+import org.apache.accumulo.core.tabletserver.thrift.MutationLogger.log_args;
 import org.apache.accumulo.examples.wikisearch.ingest.ArticleExtractor.Article;
 import org.apache.accumulo.examples.wikisearch.iterator.GlobalIndexUidCombiner;
 import org.apache.accumulo.examples.wikisearch.iterator.TextIndexCombiner;
@@ -58,9 +59,12 @@ import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Logger;
 
 public class WikipediaPartitionedIngester extends Configured implements Tool {
-  
+
+  private static final Logger log = Logger.getLogger(WikipediaPartitionedIngester.class);
+
   public final static String INGEST_LANGUAGE = "wikipedia.ingest_language";
   public final static String SPLIT_FILE = "wikipedia.split_file";
   public final static String TABLE_NAME = "wikipedia.table";
@@ -150,7 +154,7 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
     return 0;
   }
   
-  public int runPartitionerJob() throws Exception
+  private int runPartitionerJob() throws Exception
   {
     Job partitionerJob = new Job(getConf(), "Partition Wikipedia");
     Configuration partitionerConf = partitionerJob.getConfiguration();
@@ -191,7 +195,7 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
     return partitionerJob.waitForCompletion(true) ? 0 : 1;
   }
   
-  public int runIngestJob() throws Exception
+  private int runIngestJob() throws Exception
   {
     Job ingestJob = new Job(getConf(), "Ingest Partitioned Wikipedia");
     Configuration ingestConf = ingestJob.getConfiguration();
@@ -221,6 +225,16 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
     
     if(WikipediaConfiguration.bulkIngest(ingestConf))
     {
+      ingestJob.setOutputFormatClass(SortingRFileOutputFormat.class);
+      SortingRFileOutputFormat.setMaxBufferSize(ingestConf, WikipediaConfiguration.bulkIngestBufferSize(ingestConf));
+      String bulkIngestDir = WikipediaConfiguration.bulkIngestDir(ingestConf);
+      if(bulkIngestDir == null)
+      {
+        log.error("Bulk ingest dir not set");
+        return 1;
+      }
+      SortingRFileOutputFormat.setPathName(ingestConf, WikipediaConfiguration.bulkIngestDir(ingestConf));
+    } else {
       ingestJob.setOutputFormatClass(AccumuloOutputFormat.class);
       String zookeepers = WikipediaConfiguration.getZookeepers(ingestConf);
       String instanceName = WikipediaConfiguration.getInstanceName(ingestConf);
@@ -228,16 +242,12 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
       byte[] password = WikipediaConfiguration.getPassword(ingestConf);
       AccumuloOutputFormat.setOutputInfo(ingestJob.getConfiguration(), user, password, true, tablename);
       AccumuloOutputFormat.setZooKeeperInstance(ingestJob.getConfiguration(), instanceName, zookeepers);
-    } else {
-      ingestJob.setOutputFormatClass(SortingRFileOutputFormat.class);
-      SortingRFileOutputFormat.setMaxBufferSize(ingestConf, WikipediaConfiguration.bulkIngestBufferSize(ingestConf));
-      SortingRFileOutputFormat.setPathName(ingestConf, WikipediaConfiguration.bulkIngestDir(ingestConf));
     }
     
     return ingestJob.waitForCompletion(true) ? 0 : 1;
   }
   
-  public int loadBulkFiles() throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException
+  private int loadBulkFiles() throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException
   {
     Configuration conf = getConf();
 
@@ -253,7 +263,9 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
       if(status.isDir() == false)
         continue;
       Path dir = status.getPath();
-      connector.tableOperations().importDirectory(dir.getName(), dir.toString(), failureDirectory+"/"+dir.getName(), true);
+      Path failPath = new Path(failureDirectory+"/"+dir.getName());
+      fs.mkdirs(failPath);
+      connector.tableOperations().importDirectory(dir.getName(), dir.toString(), failPath.toString(), true);
     }
     
     return 0;

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0e1e67db/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
index f556287..d8c57c2 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
@@ -4,6 +4,7 @@ import java.io.IOException;
 
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.OutputCommitter;
@@ -12,9 +13,12 @@ import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.io.Text;
+import org.apache.log4j.Logger;
 
 public class SortingRFileOutputFormat extends OutputFormat<Text,Mutation> {
-  
+
+  private static final Logger log = Logger.getLogger(SortingRFileOutputFormat.class);
+
   public static final String PATH_NAME = "sortingrfileoutputformat.path";
   public static final String MAX_BUFFER_SIZE = "sortingrfileoutputformat.max.buffer.size";
   


[33/50] [abbrv] git commit: ACCUMULO-1546 fixing wikisearch pom to make maven release plugin happy

Posted by uj...@apache.org.
ACCUMULO-1546 fixing wikisearch pom to make maven release plugin happy


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/fb1d6c58
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/fb1d6c58
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/fb1d6c58

Branch: refs/heads/master
Commit: fb1d6c5866f9ccfdb9d4d19b1b4dfe57a462409d
Parents: a208a7c
Author: Mike Drob <md...@mdrob.com>
Authored: Thu Aug 15 01:34:24 2013 -0400
Committer: Mike Drob <md...@mdrob.com>
Committed: Thu Aug 15 01:37:21 2013 -0400

----------------------------------------------------------------------
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/fb1d6c58/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1036436..0598c41 100644
--- a/pom.xml
+++ b/pom.xml
@@ -185,7 +185,7 @@
       <dependency>
         <groupId>org.apache.accumulo</groupId>
         <artifactId>accumulo-core</artifactId>
-        <version>${version.accumulo}</version>
+        <version>${project.version}</version>
       </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>
@@ -200,7 +200,7 @@
       <dependency>
         <groupId>org.apache.accumulo</groupId>
         <artifactId>cloudtrace</artifactId>
-        <version>${version.accumulo}</version>
+        <version>${project.version}</version>
       </dependency>
       <dependency>
         <groupId>com.google.collections</groupId>


[04/50] [abbrv] git commit: ACCUMULO-375 added an LRUOutputCombiner to combine and reduce the number of mutations sent to Accumulo

Posted by uj...@apache.org.
ACCUMULO-375 added an LRUOutputCombiner to combine and reduce the number of mutations sent to Accumulo

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1242117 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/842696ee
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/842696ee
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/842696ee

Branch: refs/heads/master
Commit: 842696eebe9a10dc63731c99160d1e1ef23e7787
Parents: fa35931
Author: Adam Fuchs <af...@apache.org>
Authored: Wed Feb 8 21:43:35 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Wed Feb 8 21:43:35 2012 +0000

----------------------------------------------------------------------
 .../wikisearch/ingest/LRUOutputCombiner.java    |  61 +++++
 .../wikisearch/ingest/WikipediaInputFormat.java |   2 +
 .../wikisearch/ingest/WikipediaMapper.java      |   2 +-
 .../ingest/WikipediaPartitionedMapper.java      | 229 +++++++++++++------
 4 files changed, 229 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/842696ee/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/LRUOutputCombiner.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/LRUOutputCombiner.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/LRUOutputCombiner.java
new file mode 100644
index 0000000..e641f36
--- /dev/null
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/LRUOutputCombiner.java
@@ -0,0 +1,61 @@
+package org.apache.accumulo.examples.wikisearch.ingest;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+public class LRUOutputCombiner<Key,Value> extends LinkedHashMap<Key,Value> {
+  
+  private static final long serialVersionUID = 1L;
+
+  public static abstract class Fold <Value>
+  {
+    public abstract Value fold(Value oldValue, Value newValue);
+  }
+  
+  public static abstract class Output<Key,Value>
+  {
+    public abstract void output(Key key, Value value);
+  }
+  
+  private final int capacity;
+  private final Fold<Value> fold;
+  private final Output<Key,Value> output;
+  
+  private long cacheHits = 0;
+  private long cacheMisses = 0;
+  
+  public LRUOutputCombiner(int capacity, Fold<Value> fold, Output<Key,Value> output) {
+    super(capacity + 1, 1.1f, true);
+    this.capacity = capacity;
+    this.fold = fold;
+    this.output = output;
+  }
+  
+  protected boolean removeEldestEntry(Map.Entry<Key,Value> eldest) {
+    if (size() > capacity) {
+      output.output(eldest.getKey(), eldest.getValue());
+      return true;
+    }
+    return false;
+  }
+  
+  @Override
+  public Value put(Key key, Value value) {
+    Value val = get(key);
+    if (val != null) {
+      value = fold.fold(val, value);
+      cacheHits++;
+    } else {
+      cacheMisses++;
+    }
+    super.put(key, value);
+    return null;
+  }
+  
+  public void flush() {
+    for (Map.Entry<Key,Value> e : entrySet()) {
+      output.output(e.getKey(), e.getValue());
+    }
+    clear();
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/842696ee/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
index 731d02c..e682f2f 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
@@ -67,6 +67,8 @@ public class WikipediaInputFormat extends TextInputFormat {
 
     @Override
     public String[] getLocations() throws IOException, InterruptedException {
+      // for highly replicated files, returning all of the locations can lead to bunching
+      // TODO replace this with a subset of the locations
       return fileSplit.getLocations();
     }
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/842696ee/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
index 1ec531b..a06c57f 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
@@ -205,7 +205,7 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
    * @return
    * @throws IOException
    */
-  private Set<String> getTokens(Article article) throws IOException {
+  static Set<String> getTokens(Article article) throws IOException {
     Set<String> tokenList = new HashSet<String>();
     WikipediaTokenizer tok = new WikipediaTokenizer(new StringReader(article.getText()));
     TermAttribute term = tok.addAttribute(TermAttribute.class);

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/842696ee/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
index 4d94c24..25bf572 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
@@ -21,7 +21,6 @@ package org.apache.accumulo.examples.wikisearch.ingest;
 
 
 import java.io.IOException;
-import java.io.StringReader;
 import java.nio.charset.Charset;
 import java.util.HashSet;
 import java.util.Map.Entry;
@@ -31,17 +30,15 @@ import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.ColumnVisibility;
 import org.apache.accumulo.examples.wikisearch.ingest.ArticleExtractor.Article;
+import org.apache.accumulo.examples.wikisearch.iterator.GlobalIndexUidCombiner;
 import org.apache.accumulo.examples.wikisearch.normalizer.LcNoDiacriticsNormalizer;
 import org.apache.accumulo.examples.wikisearch.protobuf.Uid;
-import org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.log4j.Logger;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.wikipedia.analysis.WikipediaTokenizer;
 
 import com.google.common.collect.HashMultimap;
 import com.google.common.collect.Multimap;
@@ -66,17 +63,171 @@ public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutatio
   private Text reverseIndexTableName = null;
   private Text metadataTableName = null;
   
+  private static class MutationInfo {
+    final String row;
+    final String colfam;
+    final String colqual;
+    final ColumnVisibility cv;
+    final long timestamp;
+    
+    public MutationInfo(String row, String colfam, String colqual, ColumnVisibility cv, long timestamp) {
+      super();
+      this.row = row;
+      this.colfam = colfam;
+      this.colqual = colqual;
+      this.cv = cv;
+      this.timestamp = timestamp;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      MutationInfo other = (MutationInfo)obj;
+      return (row == other.row || row.equals(other.row)) &&
+          (colfam == other.colfam || colfam.equals(other.colfam)) &&
+          colqual.equals(other.colqual) &&
+          (cv == other.cv || cv.equals(other.cv)) &&
+          timestamp == other.timestamp;
+    }
+
+    @Override
+    public int hashCode() {
+      return row.hashCode() ^ colfam.hashCode() ^ colqual.hashCode() ^ cv.hashCode() ^ (int)timestamp;
+    }
+  }
+  
+  private LRUOutputCombiner<MutationInfo,CountAndSet> wikiIndexOutput;
+  private LRUOutputCombiner<MutationInfo,CountAndSet> wikiReverseIndexOutput;
+  private LRUOutputCombiner<MutationInfo,Value> wikiMetadataOutput;
+  
+  private static class CountAndSet
+  {
+    public int count;
+    public HashSet<String> set;
+    
+    public CountAndSet(String entry)
+    {
+      set = new HashSet<String>();
+      set.add(entry);
+      count = 1;
+    }
+  }
+  
+
   @Override
-  public void setup(Context context) {
+  public void setup(final Context context) {
     Configuration conf = context.getConfiguration();
     tablename = new Text(WikipediaConfiguration.getTableName(conf));
     indexTableName = new Text(tablename + "Index");
     reverseIndexTableName = new Text(tablename + "ReverseIndex");
     metadataTableName = new Text(tablename + "Metadata");
     
+    final Text metadataTableNameFinal = metadataTableName;
+    final Text indexTableNameFinal = indexTableName;
+    final Text reverseIndexTableNameFinal = reverseIndexTableName;
+    
     numPartitions = WikipediaConfiguration.getNumPartitions(conf);
+
+    LRUOutputCombiner.Fold<CountAndSet> indexFold = 
+        new LRUOutputCombiner.Fold<CountAndSet>() {
+      @Override
+      public CountAndSet fold(CountAndSet oldValue, CountAndSet newValue) {
+        oldValue.count += newValue.count;
+        if(oldValue.set == null || newValue.set == null)
+        {
+          oldValue.set = null;
+          return oldValue;
+        }
+        oldValue.set.addAll(newValue.set);
+        if(oldValue.set.size() > GlobalIndexUidCombiner.MAX)
+          oldValue.set = null;
+        return oldValue;
+      }
+    };
+    LRUOutputCombiner.Output<MutationInfo,CountAndSet> indexOutput =
+        new LRUOutputCombiner.Output<WikipediaPartitionedMapper.MutationInfo,CountAndSet>() {
+      
+      @Override
+      public void output(MutationInfo key, CountAndSet value)
+      {
+          Uid.List.Builder builder = Uid.List.newBuilder();
+          builder.setCOUNT(value.count);
+          if (value.set == null) {
+            builder.setIGNORE(true);
+            builder.clearUID();
+          } else {
+            builder.setIGNORE(false);
+            builder.addAllUID(value.set);
+          }
+          Uid.List list = builder.build();
+          Value val = new Value(list.toByteArray());
+          Mutation m = new Mutation(key.row);
+          m.put(key.colfam, key.colqual, key.cv, key.timestamp, val);
+          try {
+            context.write(indexTableNameFinal, m);
+          } catch (Exception e) {
+            throw new RuntimeException(e);
+          }
+      }
+    };
+    LRUOutputCombiner.Output<MutationInfo,CountAndSet> reverseIndexOutput =
+        new LRUOutputCombiner.Output<WikipediaPartitionedMapper.MutationInfo,CountAndSet>() {
+      
+      @Override
+      public void output(MutationInfo key, CountAndSet value)
+      {
+          Uid.List.Builder builder = Uid.List.newBuilder();
+          builder.setCOUNT(value.count);
+          if (value.set == null) {
+            builder.setIGNORE(true);
+            builder.clearUID();
+          } else {
+            builder.setIGNORE(false);
+            builder.addAllUID(value.set);
+          }
+          Uid.List list = builder.build();
+          Value val = new Value(list.toByteArray());
+          Mutation m = new Mutation(key.row);
+          m.put(key.colfam, key.colqual, key.cv, key.timestamp, val);
+          try {
+            context.write(reverseIndexTableNameFinal, m);
+          } catch (Exception e) {
+            throw new RuntimeException(e);
+          }
+      }
+    };
+      
+    wikiIndexOutput = new LRUOutputCombiner<WikipediaPartitionedMapper.MutationInfo,CountAndSet>(10000,indexFold,indexOutput);
+    wikiReverseIndexOutput = new LRUOutputCombiner<WikipediaPartitionedMapper.MutationInfo,CountAndSet>(10000, indexFold,reverseIndexOutput);
+    wikiMetadataOutput = new LRUOutputCombiner<WikipediaPartitionedMapper.MutationInfo,Value>(10000,
+        new LRUOutputCombiner.Fold<Value>() {
+          @Override
+          public Value fold(Value oldValue, Value newValue) {
+            return oldValue;
+          }},
+        new LRUOutputCombiner.Output<MutationInfo,Value>() {
+          @Override
+          public void output(MutationInfo key, Value value) {
+            Mutation m = new Mutation(key.row);
+            m.put(key.colfam, key.colqual, key.cv, key.timestamp, value);
+            try {
+              context.write(metadataTableNameFinal, m);
+            } catch (Exception e) {
+              throw new RuntimeException(e);
+            }
+          }});
   }
   
+  
+  
+  @Override
+  protected void cleanup(Context context) throws IOException, InterruptedException {
+    wikiIndexOutput.flush();
+    wikiMetadataOutput.flush();
+    wikiReverseIndexOutput.flush();
+  }
+
+
+
   @Override
   protected void map(Text language, Article article, Context context) throws IOException, InterruptedException {
     String NULL_BYTE = "\u0000";
@@ -93,13 +244,12 @@ public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutatio
       for (Entry<String,Object> entry : article.getFieldValues().entrySet()) {
         m.put(colfPrefix + article.getId(), entry.getKey() + NULL_BYTE + entry.getValue().toString(), cv, article.getTimestamp(), NULL_VALUE);
         // Create mutations for the metadata table.
-        Mutation mm = new Mutation(entry.getKey());
-        mm.put(METADATA_EVENT_COLUMN_FAMILY, language.toString(), cv, article.getTimestamp(), NULL_VALUE);
-        context.write(metadataTableName, mm);
+        MutationInfo mm = new MutationInfo(entry.getKey(), METADATA_EVENT_COLUMN_FAMILY, language.toString(), cv, article.getTimestamp());
+        wikiMetadataOutput.put(mm, NULL_VALUE);
       }
       
       // Tokenize the content
-      Set<String> tokens = getTokens(article);
+      Set<String> tokens = WikipediaMapper.getTokens(article);
       
       // We are going to put the fields to be indexed into a multimap. This allows us to iterate
       // over the entire set once.
@@ -118,30 +268,17 @@ public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutatio
         m.put(indexPrefix + index.getKey(), index.getValue() + NULL_BYTE + colfPrefix + article.getId(), cv, article.getTimestamp(), NULL_VALUE);
         
         // Create mutations for the global index
-        // Create a UID object for the Value
-        Builder uidBuilder = Uid.List.newBuilder();
-        uidBuilder.setIGNORE(false);
-        uidBuilder.setCOUNT(1);
-        uidBuilder.addUID(Integer.toString(article.getId()));
-        Uid.List uidList = uidBuilder.build();
-        Value val = new Value(uidList.toByteArray());
-        
-        // Create mutations for the global index
         // Row is field value, colf is field name, colq is partitionid\0language, value is Uid.List object
-        Mutation gm = new Mutation(index.getValue());
-        gm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), val);
-        context.write(indexTableName, gm);
+        MutationInfo gm = new MutationInfo(index.getValue(),index.getKey(),partitionId + NULL_BYTE + language, cv, article.getTimestamp());
+        wikiIndexOutput.put(gm, new CountAndSet(Integer.toString(article.getId())));
         
         // Create mutations for the global reverse index
-        Mutation grm = new Mutation(StringUtils.reverse(index.getValue()));
-        grm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), val);
-        context.write(reverseIndexTableName, grm);
+        MutationInfo grm = new MutationInfo(StringUtils.reverse(index.getValue()),index.getKey(),partitionId + NULL_BYTE + language, cv, article.getTimestamp());
+        wikiReverseIndexOutput.put(grm, new CountAndSet(Integer.toString(article.getId())));
         
         // Create mutations for the metadata table.
-        Mutation mm = new Mutation(index.getKey());
-        mm.put(METADATA_INDEX_COLUMN_FAMILY, language + NULL_BYTE + LcNoDiacriticsNormalizer.class.getName(), cv, article.getTimestamp(), NULL_VALUE);
-        context.write(metadataTableName, mm);
-        
+        MutationInfo mm = new MutationInfo(index.getKey(),METADATA_INDEX_COLUMN_FAMILY, language + NULL_BYTE + LcNoDiacriticsNormalizer.class.getName(), cv, article.getTimestamp());
+        wikiMetadataOutput.put(mm, NULL_VALUE);
       }
       // Add the entire text to the document section of the table.
       // row is the partition, colf is 'd', colq is language\0articleid, value is Base64 encoded GZIP'd document
@@ -153,40 +290,4 @@ public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutatio
     }
     context.progress();
   }
-  
-  /**
-   * Tokenize the wikipedia content
-   * 
-   * @param article
-   * @return
-   * @throws IOException
-   */
-  private Set<String> getTokens(Article article) throws IOException {
-    Set<String> tokenList = new HashSet<String>();
-    WikipediaTokenizer tok = new WikipediaTokenizer(new StringReader(article.getText()));
-    TermAttribute term = tok.addAttribute(TermAttribute.class);
-    try {
-      while (tok.incrementToken()) {
-        String token = term.term();
-        if (!StringUtils.isEmpty(token))
-          tokenList.add(token);
-      }
-    } catch (IOException e) {
-      log.error("Error tokenizing text", e);
-    } finally {
-      try {
-        tok.end();
-      } catch (IOException e) {
-        log.error("Error calling end()", e);
-      } finally {
-        try {
-          tok.close();
-        } catch (IOException e) {
-          log.error("Error closing tokenizer", e);
-        }
-      }
-    }
-    return tokenList;
-  }
-  
 }


[46/50] [abbrv] git commit: ACCUMULO-1882 wikisearch-ingest module fails to build.

Posted by uj...@apache.org.
ACCUMULO-1882 wikisearch-ingest module fails to build.

Work around MDEP-187 by using prepare-package instead of process-resources to copy deps to the lib/ directory.


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/49f553a0
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/49f553a0
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/49f553a0

Branch: refs/heads/master
Commit: 49f553a03a57ae53c8aa4308cbd36bcd8845b892
Parents: 1e4a40b
Author: Sean Busbey <bu...@clouderagovt.com>
Authored: Tue Dec 17 10:12:51 2013 -0500
Committer: Sean Busbey <bu...@cloudera.com>
Committed: Mon Mar 10 11:04:13 2014 -0500

----------------------------------------------------------------------
 ingest/pom.xml | 2 +-
 pom.xml        | 2 +-
 query/pom.xml  | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/49f553a0/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index c4e0336..cd8df15 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -96,7 +96,7 @@
         <executions>
           <execution>
             <id>copy-dependencies</id>
-            <phase>process-resources</phase>
+            <phase>prepare-package</phase>
             <goals>
               <goal>copy-dependencies</goal>
             </goals>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/49f553a0/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 44996e9..69b85db 100644
--- a/pom.xml
+++ b/pom.xml
@@ -133,7 +133,7 @@
         <executions>
           <execution>
             <id>copy-dependencies</id>
-            <phase>process-resources</phase>
+            <phase>prepare-package</phase>
             <goals>
               <goal>copy-dependencies</goal>
             </goals>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/49f553a0/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index eb932f9..c8192f6 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -99,7 +99,7 @@
         <executions>
           <execution>
             <id>copy-dependencies</id>
-            <phase>process-resources</phase>
+            <phase>prepare-package</phase>
             <goals>
               <goal>copy-dependencies</goal>
             </goals>
@@ -183,7 +183,7 @@
             <executions>
               <execution>
                 <id>copy-dependencies</id>
-                <phase>process-resources</phase>
+                <phase>prepare-package</phase>
                 <goals>
                   <goal>copy-dependencies</goal>
                 </goals>


[35/50] [abbrv] git commit: [maven-release-plugin] prepare release 1.4.4-rc3

Posted by uj...@apache.org.
[maven-release-plugin] prepare release 1.4.4-rc3


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/2807edad
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/2807edad
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/2807edad

Branch: refs/heads/master
Commit: 2807edad52dc5f229f3f118d04a6ecee70fb6244
Parents: 6b6ff98
Author: Mike Drob <md...@mdrob.com>
Authored: Thu Aug 15 12:10:08 2013 -0400
Committer: Mike Drob <md...@mdrob.com>
Committed: Thu Aug 15 12:10:08 2013 -0400

----------------------------------------------------------------------
 ingest/pom.xml    | 2 +-
 pom.xml           | 2 +-
 query-war/pom.xml | 2 +-
 query/pom.xml     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/2807edad/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index f2a8f77..e84278d 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4-SNAPSHOT</version>
+    <version>1.4.4</version>
     <relativePath>../</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/2807edad/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 0598c41..87e9d71 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
   <parent>
         <artifactId>accumulo-examples</artifactId>
         <groupId>org.apache.accumulo</groupId>
-        <version>1.4.4-SNAPSHOT</version>
+        <version>1.4.4</version>
         <relativePath>../</relativePath>
   </parent>
   <artifactId>accumulo-wikisearch</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/2807edad/query-war/pom.xml
----------------------------------------------------------------------
diff --git a/query-war/pom.xml b/query-war/pom.xml
index 555f995..a425f7c 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4-SNAPSHOT</version>
+    <version>1.4.4</version>
   </parent>
 
   <artifactId>wikisearch-query-war</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/2807edad/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index 9d20269..b5ce0b0 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4-SNAPSHOT</version>
+    <version>1.4.4</version>
     <relativePath>../</relativePath>
   </parent>
 


[23/50] [abbrv] git commit: ACCUMULO-665 update AndIterator to seek to the right column families

Posted by uj...@apache.org.
ACCUMULO-665 update AndIterator to seek to the right column families

git-svn-id: https://svn.apache.org/repos/asf/accumulo/branches/1.4@1358206 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/a3ca5a7b
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/a3ca5a7b
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/a3ca5a7b

Branch: refs/heads/master
Commit: a3ca5a7bb22976b67809d21e959835ce38323d08
Parents: 86aedda
Author: Eric C. Newton <ec...@apache.org>
Authored: Fri Jul 6 13:41:02 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Fri Jul 6 13:41:02 2012 +0000

----------------------------------------------------------------------
 .../wikisearch/iterator/AndIterator.java        | 54 ++++++++------------
 1 file changed, 21 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/a3ca5a7b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
index b469625..eafc4dd 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
@@ -17,8 +17,8 @@
 package org.apache.accumulo.examples.wikisearch.iterator;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Map;
 
 import org.apache.accumulo.core.data.ArrayByteSequence;
@@ -48,8 +48,8 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
   private Text currentTerm = new Text(emptyByteArray);
   private Text currentDocID = new Text(emptyByteArray);
   private Collection<ByteSequence> seekColumnFamilies;
-  private boolean inclusive;
   private Text parentEndRow;
+  private static boolean SEEK_INCLUSIVE = true;
   
   /**
    * Used in representing a Term that is intersected on.
@@ -60,24 +60,21 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
     public Text dataLocation;
     public Text term;
     public boolean notFlag;
+    private Collection<ByteSequence> seekColumnFamilies;
     
-    public TermSource(TermSource other) {
-      this.iter = other.iter;
-      this.dataLocation = other.dataLocation;
-      this.term = other.term;
-      this.notFlag = other.notFlag;
+    private TermSource(TermSource other) {
+      this(other.iter, other.dataLocation, other.term, other.notFlag);
     }
     
     public TermSource(SortedKeyValueIterator<Key,Value> iter, Text dataLocation, Text term) {
-      this.iter = iter;
-      this.dataLocation = dataLocation;
-      this.term = term;
-      this.notFlag = false;
+      this(iter, dataLocation, term, false);
     }
     
     public TermSource(SortedKeyValueIterator<Key,Value> iter, Text dataLocation, Text term, boolean notFlag) {
       this.iter = iter;
       this.dataLocation = dataLocation;
+      ByteSequence bs = new ArrayByteSequence(dataLocation.getBytes(), 0, dataLocation.getLength());
+      this.seekColumnFamilies = Collections.singletonList(bs);
       this.term = term;
       this.notFlag = notFlag;
     }
@@ -305,8 +302,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
         if (log.isDebugEnabled()) {
           log.debug("Seeking to: " + seekKey);
         }
-        
-        ts.iter.seek(new Range(seekKey, true, null, false), seekColumnFamilies, inclusive);
+        ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
         continue;
       }
       
@@ -352,7 +348,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
             log.debug("Seeking to: " + seekKey);
           }
           
-          ts.iter.seek(new Range(seekKey, true, null, false), seekColumnFamilies, inclusive);
+          ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
           if (!ts.iter.hasTop()) {
             currentRow = null;
             return true;
@@ -385,8 +381,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
           if (log.isDebugEnabled()) {
             log.debug("Seeking to: " + seekKey);
           }
-          
-          ts.iter.seek(new Range(seekKey, true, null, false), seekColumnFamilies, inclusive);
+          ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
           if (!ts.iter.hasTop()) {
             currentRow = null;
             return true;
@@ -407,14 +402,13 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
         if (log.isDebugEnabled()) {
           log.debug("Need to seek to the right term");
         }
-        
         Key seekKey = buildKey(currentRow, ts.dataLocation, new Text(ts.term + "\0"));// new Text(ts.term + "\0" + currentDocID));
         
         if (log.isDebugEnabled()) {
           log.debug("Seeking to: " + seekKey);
         }
         
-        ts.iter.seek(new Range(seekKey, true, null, false), seekColumnFamilies, inclusive);
+        ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
         if (!ts.iter.hasTop()) {
           currentRow = null;
           return true;
@@ -451,7 +445,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
           log.debug("Seeking to: " + seekKey);
         }
         
-        ts.iter.seek(new Range(seekKey, true, null, false), seekColumnFamilies, inclusive);
+        ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
         
         if (!ts.iter.hasTop()) {
           currentRow = null;
@@ -486,7 +480,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
           log.debug("Seeking to: " + seekKey);
         }
         
-        ts.iter.seek(new Range(seekKey, true, null, false), seekColumnFamilies, inclusive);
+        ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
         
         continue;
       }
@@ -749,8 +743,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
     
     // Build up the array of sources that are to be intersected
     sources = new TermSource[dataLocations.length];
-    sources[0] = new TermSource(source, dataLocations[0], terms[0]);
-    for (int i = 1; i < dataLocations.length; i++) {
+    for (int i = 0; i < dataLocations.length; i++) {
       sources[i] = new TermSource(source.deepCopy(env), dataLocations[i], terms[i], notFlags[i]);
     }
     
@@ -764,10 +757,10 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
     }
     currentRow = new Text();
     currentDocID.set(emptyByteArray);
-    doSeek(range, seekColumnFamilies, inclusive);
+    doSeek(range);
   }
   
-  private void doSeek(Range range, Collection<ByteSequence> seekColumnFamilies, boolean inclusive) throws IOException {
+  private void doSeek(Range range) throws IOException {
 
     overallRange = new Range(range);
 
@@ -775,18 +768,13 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
       this.parentEndRow = range.getEndKey().getRow();
     }
     
-    this.seekColumnFamilies = seekColumnFamilies;
-    this.inclusive = inclusive;
-    
     // seek each of the sources to the right column family within the row given by key
     for (int i = 0; i < sourcesCount; i++) {
       Key sourceKey;
       Text dataLocation = (sources[i].dataLocation == null) ? nullText : sources[i].dataLocation;
-      Collection<ByteSequence> columnFamilies = new ArrayList<ByteSequence>();
-      columnFamilies.add(new ArrayByteSequence(dataLocation.getBytes(), 0, dataLocation.getLength()));
       if (range.getStartKey() != null) {
         // Build a key with the DocID if one is given
-		if (range.getStartKey().getColumnFamily() != null) {
+        if (range.getStartKey().getColumnFamily() != null) {
           sourceKey = buildKey(getPartition(range.getStartKey()), dataLocation,
               (sources[i].term == null) ? nullText : new Text(sources[i].term + "\0" + range.getStartKey().getColumnFamily()));
         } // Build a key with just the term.
@@ -796,9 +784,9 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
         }
         if (!range.isStartKeyInclusive())
           sourceKey = sourceKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL);
-        sources[i].iter.seek(new Range(sourceKey, true, null, false), columnFamilies, inclusive);
+        sources[i].iter.seek(new Range(sourceKey, true, null, false), sources[i].seekColumnFamilies, SEEK_INCLUSIVE);
       } else {
-    	sources[i].iter.seek(range, columnFamilies, inclusive);
+        sources[i].iter.seek(range, sources[i].seekColumnFamilies, SEEK_INCLUSIVE);
       }
     }
     
@@ -912,7 +900,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
           this.currentRow = row;
           this.currentDocID = new Text(this.getUID(jumpKey));
           
-          doSeek(range, seekColumnFamilies, false);
+          doSeek(range);
 
           // make sure it is in the range if we have one.
           if (hasTop() && parentEndRow != null && topKey.getRow().compareTo(parentEndRow) > 0) {


[09/50] [abbrv] git commit: ACCUMULO-375 hybridized ingest to use some bulk and some streaming

Posted by uj...@apache.org.
ACCUMULO-375 hybridized ingest to use some bulk and some streaming

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1245142 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/ec56d2d4
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/ec56d2d4
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/ec56d2d4

Branch: refs/heads/master
Commit: ec56d2d429ebf4bd849daf71804021536f4d21ee
Parents: 0e1e67d
Author: Adam Fuchs <af...@apache.org>
Authored: Thu Feb 16 19:54:31 2012 +0000
Committer: Adam Fuchs <af...@apache.org>
Committed: Thu Feb 16 19:54:31 2012 +0000

----------------------------------------------------------------------
 .../ingest/WikipediaPartitionedMapper.java        | 18 +++++++++++++++---
 .../output/BufferingRFileRecordWriter.java        |  9 +++++----
 2 files changed, 20 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/ec56d2d4/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
index 25bf572..7816b03 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
@@ -26,6 +26,9 @@ import java.util.HashSet;
 import java.util.Map.Entry;
 import java.util.Set;
 
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.MultiTableBatchWriter;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.ColumnVisibility;
@@ -112,6 +115,7 @@ public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutatio
     }
   }
   
+  MultiTableBatchWriter mtbw;
 
   @Override
   public void setup(final Context context) {
@@ -121,6 +125,14 @@ public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutatio
     reverseIndexTableName = new Text(tablename + "ReverseIndex");
     metadataTableName = new Text(tablename + "Metadata");
     
+    try {
+      mtbw = WikipediaConfiguration.getConnector(conf).createMultiTableBatchWriter(10000000, 1000, 10);
+    } catch (AccumuloException e) {
+      throw new RuntimeException(e);
+    } catch (AccumuloSecurityException e) {
+      throw new RuntimeException(e);
+    }
+    
     final Text metadataTableNameFinal = metadataTableName;
     final Text indexTableNameFinal = indexTableName;
     final Text reverseIndexTableNameFinal = reverseIndexTableName;
@@ -163,7 +175,7 @@ public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutatio
           Mutation m = new Mutation(key.row);
           m.put(key.colfam, key.colqual, key.cv, key.timestamp, val);
           try {
-            context.write(indexTableNameFinal, m);
+            mtbw.getBatchWriter(indexTableNameFinal.toString()).addMutation(m);
           } catch (Exception e) {
             throw new RuntimeException(e);
           }
@@ -189,7 +201,7 @@ public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutatio
           Mutation m = new Mutation(key.row);
           m.put(key.colfam, key.colqual, key.cv, key.timestamp, val);
           try {
-            context.write(reverseIndexTableNameFinal, m);
+            mtbw.getBatchWriter(reverseIndexTableNameFinal.toString()).addMutation(m);
           } catch (Exception e) {
             throw new RuntimeException(e);
           }
@@ -210,7 +222,7 @@ public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutatio
             Mutation m = new Mutation(key.row);
             m.put(key.colfam, key.colqual, key.cv, key.timestamp, value);
             try {
-              context.write(metadataTableNameFinal, m);
+              mtbw.getBatchWriter(metadataTableNameFinal.toString()).addMutation(m);
             } catch (Exception e) {
               throw new RuntimeException(e);
             }

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/ec56d2d4/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
index a7e7dcf..579bbe1 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/BufferingRFileRecordWriter.java
@@ -69,8 +69,8 @@ final class BufferingRFileRecordWriter extends RecordWriter<Text,Mutation> {
     if (buffer.size() == 0)
       return;
     
-    // TODO fix the filename
     String file = filenamePrefix + "/" + tablename + "/" + taskID + "_" + (fileCount++) + ".rf";
+    // TODO get the table configuration for the given table?
     FileSKVWriter writer = RFileOperations.getInstance().openWriter(file, fs, conf, acuconf);
     
     // forget locality groups for now, just write everything to the default
@@ -110,17 +110,18 @@ final class BufferingRFileRecordWriter extends RecordWriter<Text,Mutation> {
     {
       Key k = new Key(mutation.getRow(),update.getColumnFamily(),update.getColumnQualifier(),update.getColumnVisibility(),update.getTimestamp(),update.isDeleted());
       Value v = new Value(update.getValue());
+      // TODO account for object overhead
       mutationSize += k.getSize();
       mutationSize += v.getSize();
       buffer.put(k, v);
     }
     size += mutationSize;
     long bufferSize = bufferSizes.get(table);
+    
+    // TODO use a MutableLong instead
     bufferSize += mutationSize;
     bufferSizes.put(table, bufferSize);
-    
-    // TODO add object overhead size
-    
+
     while (size >= maxSize) {
       flushLargestTable();
     }


[37/50] [abbrv] git commit: Merge tag 'tags/1.4.4' into 1.4.5-SNAPSHOT

Posted by uj...@apache.org.
Merge tag 'tags/1.4.4' into 1.4.5-SNAPSHOT

Ensure that 1.4.5 and later includes 1.4.4 history


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/5cd987dd
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/5cd987dd
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/5cd987dd

Branch: refs/heads/master
Commit: 5cd987ddcab418d3b8f149fe73a713ecf15a91f8
Parents: 8082805 2807eda
Author: Christopher Tubbs <ct...@apache.org>
Authored: Wed Oct 9 16:17:20 2013 -0400
Committer: Christopher Tubbs <ct...@apache.org>
Committed: Wed Oct 9 16:17:20 2013 -0400

----------------------------------------------------------------------

----------------------------------------------------------------------



[36/50] [abbrv] git commit: ACCUMULO-1546 sliding the version since 1.4.4 has been released

Posted by uj...@apache.org.
ACCUMULO-1546 sliding the version since 1.4.4 has been released


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/80828054
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/80828054
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/80828054

Branch: refs/heads/master
Commit: 80828054f2724cc57f872a1454b760a0be01842d
Parents: a208a7c
Author: Eric Newton <ec...@apache.org>
Authored: Fri Aug 30 11:09:00 2013 -0400
Committer: Eric Newton <ec...@apache.org>
Committed: Fri Aug 30 11:09:00 2013 -0400

----------------------------------------------------------------------
 ingest/bin/ingest.sh          | 2 +-
 ingest/bin/ingest_parallel.sh | 2 +-
 ingest/pom.xml                | 2 +-
 pom.xml                       | 2 +-
 query-war/pom.xml             | 2 +-
 query/pom.xml                 | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/80828054/ingest/bin/ingest.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest.sh b/ingest/bin/ingest.sh
index 3eb5df4..acdcbf8 100755
--- a/ingest/bin/ingest.sh
+++ b/ingest/bin/ingest.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.4-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.5-SNAPSHOT.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/80828054/ingest/bin/ingest_parallel.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest_parallel.sh b/ingest/bin/ingest_parallel.sh
index c2ef4b3..8c63ac0 100755
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.4-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.5-SNAPSHOT.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/80828054/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index f2a8f77..29b2047 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4-SNAPSHOT</version>
+    <version>1.4.5-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/80828054/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1036436..1ea9a2a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
   <parent>
         <artifactId>accumulo-examples</artifactId>
         <groupId>org.apache.accumulo</groupId>
-        <version>1.4.4-SNAPSHOT</version>
+        <version>1.4.5-SNAPSHOT</version>
         <relativePath>../</relativePath>
   </parent>
   <artifactId>accumulo-wikisearch</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/80828054/query-war/pom.xml
----------------------------------------------------------------------
diff --git a/query-war/pom.xml b/query-war/pom.xml
index 555f995..485d584 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4-SNAPSHOT</version>
+    <version>1.4.5-SNAPSHOT</version>
   </parent>
 
   <artifactId>wikisearch-query-war</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/80828054/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index 9d20269..05b1e46 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.4-SNAPSHOT</version>
+    <version>1.4.5-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 


[25/50] [abbrv] git commit: ACCUMULO-614 slide version in 1.4 branch now that 1.4.1 has been released

Posted by uj...@apache.org.
ACCUMULO-614 slide version in 1.4 branch now that 1.4.1 has been released

git-svn-id: https://svn.apache.org/repos/asf/accumulo/branches/1.4@1358245 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/873f98ce
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/873f98ce
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/873f98ce

Branch: refs/heads/master
Commit: 873f98cecd8c60cd140179274373d92770ff1daf
Parents: a286a87
Author: Eric C. Newton <ec...@apache.org>
Authored: Fri Jul 6 14:42:59 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Fri Jul 6 14:42:59 2012 +0000

----------------------------------------------------------------------
 ingest/bin/ingest.sh          | 2 +-
 ingest/bin/ingest_parallel.sh | 2 +-
 ingest/pom.xml                | 2 +-
 pom.xml                       | 2 +-
 query-war/pom.xml             | 2 +-
 query/pom.xml                 | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/873f98ce/ingest/bin/ingest.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest.sh b/ingest/bin/ingest.sh
index 39afff0..16d3cf5 100755
--- a/ingest/bin/ingest.sh
+++ b/ingest/bin/ingest.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.1-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.2-SNAPSHOT.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/873f98ce/ingest/bin/ingest_parallel.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest_parallel.sh b/ingest/bin/ingest_parallel.sh
index 2407cef..f83bf0b 100755
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.1-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.2-SNAPSHOT.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/873f98ce/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index 3a4e4fd..0dd3aa5 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.1-SNAPSHOT</version>
+    <version>1.4.2-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/873f98ce/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 5859604..c517589 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
   <parent>
         <artifactId>accumulo-examples</artifactId>
         <groupId>org.apache.accumulo</groupId>
-        <version>1.4.1-SNAPSHOT</version>
+        <version>1.4.2-SNAPSHOT</version>
         <relativePath>../</relativePath>
   </parent>
   <artifactId>accumulo-wikisearch</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/873f98ce/query-war/pom.xml
----------------------------------------------------------------------
diff --git a/query-war/pom.xml b/query-war/pom.xml
index 50ff054..1e9fd2c 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.1-SNAPSHOT</version>
+    <version>1.4.2-SNAPSHOT</version>
   </parent>
 
   <artifactId>wikisearch-query-war</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/873f98ce/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index 1c51744..488c897 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.1-SNAPSHOT</version>
+    <version>1.4.2-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 


[34/50] [abbrv] git commit: ACCUMULO-1546 manually removing snapshot version references

Posted by uj...@apache.org.
ACCUMULO-1546 manually removing snapshot version references


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/6b6ff987
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/6b6ff987
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/6b6ff987

Branch: refs/heads/master
Commit: 6b6ff987c8ff50383ae7873671e63f1a84b34b29
Parents: fb1d6c5
Author: Mike Drob <md...@mdrob.com>
Authored: Sun Aug 11 12:42:50 2013 -0400
Committer: Mike Drob <md...@mdrob.com>
Committed: Thu Aug 15 01:37:43 2013 -0400

----------------------------------------------------------------------
 ingest/bin/ingest.sh          | 2 +-
 ingest/bin/ingest_parallel.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/6b6ff987/ingest/bin/ingest.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest.sh b/ingest/bin/ingest.sh
index 3eb5df4..aff15d3 100755
--- a/ingest/bin/ingest.sh
+++ b/ingest/bin/ingest.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.4-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.4.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/6b6ff987/ingest/bin/ingest_parallel.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest_parallel.sh b/ingest/bin/ingest_parallel.sh
index c2ef4b3..2f77520 100755
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.4-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.4.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH


[38/50] [abbrv] git commit: ACCUMULO-286 added context factory stuff

Posted by uj...@apache.org.
ACCUMULO-286 added context factory stuff

git-svn-id: https://svn.apache.org/repos/asf/accumulo/trunk@1328104 13f79535-47bb-0310-9956-ffa450edef68
(cherry picked from commit 0680b04bf03e2d6ad19ae3c368f6cb23f4e30056)

Reason: Testing
Author: Billie Rinaldi <bi...@apache.org>
Ref: ACCUMULO-1792

Signed-off-by: Eric Newton <er...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/0c429f98
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/0c429f98
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/0c429f98

Branch: refs/heads/master
Commit: 0c429f986b9e106cf3598cd37726bb40470ffa95
Parents: f0b42c7
Author: Billie Rinaldi <bi...@apache.org>
Authored: Thu Apr 19 20:22:16 2012 +0000
Committer: Eric Newton <er...@gmail.com>
Committed: Mon Nov 25 16:06:42 2013 -0500

----------------------------------------------------------------------
 .../wikisearch/ingest/WikipediaMapperTest.java  |  9 +++-----
 .../reader/AggregatingRecordReaderTest.java     | 24 +++++++++++---------
 .../wikisearch/logic/TestQueryLogic.java        |  9 ++++----
 3 files changed, 20 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0c429f98/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
----------------------------------------------------------------------
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
index a924aee..c659ec4 100644
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
+++ b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
@@ -34,8 +34,7 @@ import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
+import org.apache.accumulo.core.util.ContextFactory;
 import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -47,7 +46,6 @@ import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
 import org.junit.Before;
@@ -118,8 +116,7 @@ public class WikipediaMapperTest {
     writerMap.put(new Text(INDEX_TABLE_NAME), c.createBatchWriter(INDEX_TABLE_NAME, 1000L, 1000L, 1));
     writerMap.put(new Text(RINDEX_TABLE_NAME), c.createBatchWriter(RINDEX_TABLE_NAME, 1000L, 1000L, 1));
     
-    TaskAttemptID id = new TaskAttemptID();
-    TaskAttemptContext context = new TaskAttemptContext(conf, id);
+    TaskAttemptContext context = ContextFactory.createTaskAttemptContext(conf);
     
     RawLocalFileSystem fs = new RawLocalFileSystem();
     fs.setConf(conf);
@@ -141,7 +138,7 @@ public class WikipediaMapperTest {
     WikipediaMapper mapper = new WikipediaMapper();
     
     // Load data into Mock Accumulo
-    Mapper<LongWritable,Text,Text,Mutation>.Context con = mapper.new Context(conf, id, rr, rw, oc, sr, split);
+    Mapper<LongWritable,Text,Text,Mutation>.Context con = ContextFactory.createMapContext(mapper, context, rr, rw, oc, sr, split);
     mapper.run(con);
     
     // Flush and close record writers.

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0c429f98/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
----------------------------------------------------------------------
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
index c1cb263..c842da7 100644
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
+++ b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReaderTest.java
@@ -16,7 +16,10 @@
  */
 package org.apache.accumulo.examples.wikisearch.reader;
 
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.io.FileWriter;
@@ -28,13 +31,12 @@ import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathExpression;
 import javax.xml.xpath.XPathFactory;
 
+import org.apache.accumulo.core.util.ContextFactory;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
-import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 import org.junit.Before;
 import org.junit.Test;
@@ -95,7 +97,7 @@ public class AggregatingRecordReaderTest {
     conf.set(AggregatingRecordReader.START_TOKEN, "<doc");
     conf.set(AggregatingRecordReader.END_TOKEN, "</doc>");
     conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(true));
-    ctx = new TaskAttemptContext(conf, new TaskAttemptID());
+    ctx = ContextFactory.createTaskAttemptContext(conf);
     XPath xp = xpFactory.newXPath();
     EXPR_A = xp.compile("/doc/a");
     EXPR_B = xp.compile("/doc/b");
@@ -141,7 +143,7 @@ public class AggregatingRecordReaderTest {
     
     // Create FileSplit
     Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
     AggregatingRecordReader reader = new AggregatingRecordReader();
     try {
       // Clear the values for BEGIN and STOP TOKEN
@@ -163,7 +165,7 @@ public class AggregatingRecordReaderTest {
     
     // Create FileSplit
     Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
     
     // Initialize the RecordReader
     AggregatingRecordReader reader = new AggregatingRecordReader();
@@ -184,7 +186,7 @@ public class AggregatingRecordReaderTest {
     
     // Create FileSplit
     Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
     
     // Initialize the RecordReader
     AggregatingRecordReader reader = new AggregatingRecordReader();
@@ -202,7 +204,7 @@ public class AggregatingRecordReaderTest {
     
     // Create FileSplit
     Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
     
     // Initialize the RecordReader
     AggregatingRecordReader reader = new AggregatingRecordReader();
@@ -220,7 +222,7 @@ public class AggregatingRecordReaderTest {
     
     // Create FileSplit
     Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
     
     // Initialize the RecordReader
     AggregatingRecordReader reader = new AggregatingRecordReader();
@@ -245,7 +247,7 @@ public class AggregatingRecordReaderTest {
     
     // Create FileSplit
     Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
     
     // Initialize the RecordReader
     AggregatingRecordReader reader = new AggregatingRecordReader();
@@ -264,7 +266,7 @@ public class AggregatingRecordReaderTest {
     File f = createFile(xml5);
     // Create FileSplit
     Path p = new Path(f.toURI().toString());
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null), 0);
     
     // Initialize the RecordReader
     AggregatingRecordReader reader = new AggregatingRecordReader();

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0c429f98/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
index 4b7aaee..938f01b 100644
--- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
+++ b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
@@ -36,6 +36,7 @@ import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.ContextFactory;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
@@ -53,7 +54,6 @@ import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
 import org.apache.log4j.Level;
@@ -125,8 +125,7 @@ public class TestQueryLogic {
       writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
     }
     
-    TaskAttemptID id = new TaskAttemptID();
-    TaskAttemptContext context = new TaskAttemptContext(conf, id);
+    TaskAttemptContext context = ContextFactory.createTaskAttemptContext(conf);
     
     RawLocalFileSystem fs = new RawLocalFileSystem();
     fs.setConf(conf);
@@ -137,7 +136,7 @@ public class TestQueryLogic {
     Path tmpFile = new Path(data.getAbsolutePath());
     
     // Setup the Mapper
-    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null),0);
+    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0);
     AggregatingRecordReader rr = new AggregatingRecordReader();
     Path ocPath = new Path(tmpFile, "oc");
     OutputCommitter oc = new FileOutputCommitter(ocPath, context);
@@ -148,7 +147,7 @@ public class TestQueryLogic {
     WikipediaMapper mapper = new WikipediaMapper();
     
     // Load data into Mock Accumulo
-    Mapper<LongWritable,Text,Text,Mutation>.Context con = mapper.new Context(conf, id, rr, rw, oc, sr, split);
+    Mapper<LongWritable,Text,Text,Mutation>.Context con = ContextFactory.createMapContext(mapper, context, rr, rw, oc, sr, split);
     mapper.run(con);
     
     // Flush and close record writers.


[40/50] [abbrv] git commit: ACCUMULO-564 changes for 0.23 compile compatibility

Posted by uj...@apache.org.
ACCUMULO-564 changes for 0.23 compile compatibility

git-svn-id: https://svn.apache.org/repos/asf/accumulo/trunk/examples/wikisearch@1332674 13f79535-47bb-0310-9956-ffa450edef68
(cherry picked from commit 201ebbadb38ba5f71e870ea28c6e0120a027c8e3)

Reason: Hadoop2 Compat
Author: Billie Rinaldi <bi...@apache.org>
Ref: ACCUMULO-1977


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/c9f213e9
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/c9f213e9
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/c9f213e9

Branch: refs/heads/master
Commit: c9f213e99defefce72146caf68f41ad0bf343aa7
Parents: 0c429f9
Author: Billie Rinaldi <bi...@apache.org>
Authored: Tue May 1 14:35:39 2012 +0000
Committer: Sean Busbey <bu...@clouderagovt.com>
Committed: Tue Dec 10 14:06:40 2013 -0600

----------------------------------------------------------------------
 .../wikisearch/ingest/WikipediaIngester.java    |  10 +-
 .../ingest/StandaloneStatusReporter.java        |  70 --------
 .../wikisearch/ingest/WikipediaMapperTest.java  | 163 -------------------
 .../logic/StandaloneStatusReporter.java         |   4 +
 .../wikisearch/logic/TestQueryLogic.java        |  31 ++--
 5 files changed, 28 insertions(+), 250 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/c9f213e9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
index 50415a7..d4fa1c6 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
@@ -66,8 +66,8 @@ public class WikipediaIngester extends Configured implements Tool {
     System.exit(res);
   }
   
-  private void createTables(TableOperations tops, String tableName) throws AccumuloException, AccumuloSecurityException, TableNotFoundException,
-      TableExistsException {
+  public static void createTables(TableOperations tops, String tableName, boolean configureLocalityGroups) throws AccumuloException, AccumuloSecurityException,
+      TableNotFoundException, TableExistsException {
     // Create the shard table
     String indexTableName = tableName + "Index";
     String reverseIndexTableName = tableName + "ReverseIndex";
@@ -94,7 +94,9 @@ public class WikipediaIngester extends Configured implements Tool {
       }
       
       // Set the locality group for the full content column family
-      tops.setLocalityGroups(tableName, Collections.singletonMap("WikipediaDocuments", Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY))));
+      if (configureLocalityGroups)
+        tops.setLocalityGroups(tableName,
+            Collections.singletonMap("WikipediaDocuments", Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY))));
       
     }
     
@@ -143,7 +145,7 @@ public class WikipediaIngester extends Configured implements Tool {
     
     TableOperations tops = connector.tableOperations();
     
-    createTables(tops, tablename);
+    createTables(tops, tablename, true);
     
     configureJob(job);
     

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/c9f213e9/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java
----------------------------------------------------------------------
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java
deleted file mode 100644
index 6af1e9b..0000000
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.ingest;
-
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Counters;
-import org.apache.hadoop.mapreduce.StatusReporter;
-
-public class StandaloneStatusReporter extends StatusReporter {
-  
-  private Counters c = new Counters();
-  
-  private long filesProcessed = 0;
-  private long recordsProcessed = 0;
-  
-  public Counters getCounters() {
-    return c;
-  }
-  
-  @Override
-  public Counter getCounter(Enum<?> name) {
-    return c.findCounter(name);
-  }
-  
-  @Override
-  public Counter getCounter(String group, String name) {
-    return c.findCounter(group, name);
-  }
-  
-  @Override
-  public void progress() {
-    // do nothing
-  }
-  
-  @Override
-  public void setStatus(String status) {
-    // do nothing
-  }
-  
-  public long getFilesProcessed() {
-    return filesProcessed;
-  }
-  
-  public long getRecordsProcessed() {
-    return recordsProcessed;
-  }
-  
-  public void incrementFilesProcessed() {
-    filesProcessed++;
-    recordsProcessed = 0;
-  }
-  
-  public void incrementRecordsProcessed() {
-    recordsProcessed++;
-  }
-}

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/c9f213e9/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
----------------------------------------------------------------------
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
deleted file mode 100644
index c659ec4..0000000
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.ingest;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.util.HashMap;
-import java.util.Map.Entry;
-
-import junit.framework.Assert;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.MutationsRejectedException;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.ContextFactory;
-import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RawLocalFileSystem;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
-import org.junit.Before;
-
-/**
- * Load some data into mock accumulo
- */
-public class WikipediaMapperTest {
-  
-  private static final String METADATA_TABLE_NAME = "wikiMetadata";
-  
-  private static final String TABLE_NAME = "wiki";
-  
-  private static final String INDEX_TABLE_NAME = "wikiIndex";
-  
-  private static final String RINDEX_TABLE_NAME = "wikiReverseIndex";
-  
-  private class MockAccumuloRecordWriter extends RecordWriter<Text,Mutation> {
-    @Override
-    public void write(Text key, Mutation value) throws IOException, InterruptedException {
-      try {
-        writerMap.get(key).addMutation(value);
-      } catch (MutationsRejectedException e) {
-        throw new IOException("Error adding mutation", e);
-      }
-    }
-    
-    @Override
-    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
-      try {
-        for (BatchWriter w : writerMap.values()) {
-          w.flush();
-          w.close();
-        }
-      } catch (MutationsRejectedException e) {
-        throw new IOException("Error closing Batch Writer", e);
-      }
-    }
-    
-  }
-  
-  private Connector c = null;
-  private Configuration conf = new Configuration();
-  private HashMap<Text,BatchWriter> writerMap = new HashMap<Text,BatchWriter>();
-  
-  @Before
-  public void setup() throws Exception {
-    
-    conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
-    conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
-    conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME);
-    conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1");
-    conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
-    
-    MockInstance i = new MockInstance();
-    c = i.getConnector("root", "pass");
-    c.tableOperations().delete(METADATA_TABLE_NAME);
-    c.tableOperations().delete(TABLE_NAME);
-    c.tableOperations().delete(INDEX_TABLE_NAME);
-    c.tableOperations().delete(RINDEX_TABLE_NAME);
-    c.tableOperations().create(METADATA_TABLE_NAME);
-    c.tableOperations().create(TABLE_NAME);
-    c.tableOperations().create(INDEX_TABLE_NAME);
-    c.tableOperations().create(RINDEX_TABLE_NAME);
-    
-    writerMap.put(new Text(METADATA_TABLE_NAME), c.createBatchWriter(METADATA_TABLE_NAME, 1000L, 1000L, 1));
-    writerMap.put(new Text(TABLE_NAME), c.createBatchWriter(TABLE_NAME, 1000L, 1000L, 1));
-    writerMap.put(new Text(INDEX_TABLE_NAME), c.createBatchWriter(INDEX_TABLE_NAME, 1000L, 1000L, 1));
-    writerMap.put(new Text(RINDEX_TABLE_NAME), c.createBatchWriter(RINDEX_TABLE_NAME, 1000L, 1000L, 1));
-    
-    TaskAttemptContext context = ContextFactory.createTaskAttemptContext(conf);
-    
-    RawLocalFileSystem fs = new RawLocalFileSystem();
-    fs.setConf(conf);
-    
-    URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml");
-    Assert.assertNotNull(url);
-    File data = new File(url.toURI());
-    Path tmpFile = new Path(data.getAbsolutePath());
-    
-    // Setup the Mapper
-    InputSplit split = new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null);
-    AggregatingRecordReader rr = new AggregatingRecordReader();
-    Path ocPath = new Path(tmpFile, "oc");
-    OutputCommitter oc = new FileOutputCommitter(ocPath, context);
-    fs.deleteOnExit(ocPath);
-    StandaloneStatusReporter sr = new StandaloneStatusReporter();
-    rr.initialize(split, context);
-    MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter();
-    WikipediaMapper mapper = new WikipediaMapper();
-    
-    // Load data into Mock Accumulo
-    Mapper<LongWritable,Text,Text,Mutation>.Context con = ContextFactory.createMapContext(mapper, context, rr, rw, oc, sr, split);
-    mapper.run(con);
-    
-    // Flush and close record writers.
-    rw.close(context);
-    
-  }
-  
-  private void debugQuery(String tableName) throws Exception {
-    Scanner s = c.createScanner(tableName, new Authorizations("all"));
-    Range r = new Range();
-    s.setRange(r);
-    for (Entry<Key,Value> entry : s)
-      System.out.println(entry.getKey().toString() + " " + entry.getValue().toString());
-  }
-  
-  public void testViewAllData() throws Exception {
-    debugQuery(METADATA_TABLE_NAME);
-    debugQuery(TABLE_NAME);
-    debugQuery(INDEX_TABLE_NAME);
-    debugQuery(RINDEX_TABLE_NAME);
-  }
-}

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/c9f213e9/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/StandaloneStatusReporter.java
----------------------------------------------------------------------
diff --git a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/StandaloneStatusReporter.java b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/StandaloneStatusReporter.java
index 35743b3..a3b90d2 100644
--- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/StandaloneStatusReporter.java
+++ b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/StandaloneStatusReporter.java
@@ -67,4 +67,8 @@ public class StandaloneStatusReporter extends StatusReporter {
   public void incrementRecordsProcessed() {
     recordsProcessed++;
   }
+  
+  public float getProgress() {
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/c9f213e9/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
index 938f01b..9079d79 100644
--- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
+++ b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
@@ -16,6 +16,8 @@
  */
 package org.apache.accumulo.examples.wikisearch.logic;
 
+import static org.junit.Assert.assertEquals;
+
 import java.io.File;
 import java.io.IOException;
 import java.net.URL;
@@ -38,12 +40,12 @@ import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.ContextFactory;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
+import org.apache.accumulo.examples.wikisearch.ingest.WikipediaIngester;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
 import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
 import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
 import org.apache.accumulo.examples.wikisearch.sample.Document;
-import org.apache.accumulo.examples.wikisearch.sample.Field;
 import org.apache.accumulo.examples.wikisearch.sample.Results;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -117,11 +119,8 @@ public class TestQueryLogic {
     
     MockInstance i = new MockInstance();
     c = i.getConnector("root", "");
+    WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false);
     for (String table : TABLE_NAMES) {
-      try {
-        c.tableOperations().delete(table);
-      } catch (Exception ex) {}
-      c.tableOperations().create(table);
       writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
     }
     
@@ -162,7 +161,7 @@ public class TestQueryLogic {
   }
   
   void debugQuery(String tableName) throws Exception {
-    Scanner s = c.createScanner(tableName, new Authorizations());
+    Scanner s = c.createScanner(tableName, new Authorizations("all"));
     Range r = new Range();
     s.setRange(r);
     for (Entry<Key,Value> entry : s)
@@ -170,17 +169,23 @@ public class TestQueryLogic {
   }
   
   @Test
-  public void testTitle() {
+  public void testTitle() throws Exception {
     Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF);
     Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF);
     List<String> auths = new ArrayList<String>();
     auths.add("enwiki");
-    Results results = table.runQuery(c, auths, "TITLE == 'afghanistanhistory'", null, null, null);
-    for (Document doc : results.getResults()) {
-      System.out.println("id: " + doc.getId());
-      for (Field field : doc.getFields())
-        System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
-    }
+    
+    Results results = table.runQuery(c, auths, "TITLE == 'asphalt' or TITLE == 'abacus' or TITLE == 'acid' or TITLE == 'acronym'", null, null, null);
+    List<Document> docs = results.getResults();
+    assertEquals(4, docs.size());
+    
+    /*
+     * debugQuery(METADATA_TABLE_NAME); debugQuery(TABLE_NAME); debugQuery(INDEX_TABLE_NAME); debugQuery(RINDEX_TABLE_NAME);
+     * 
+     * results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null); docs = results.getResults(); assertEquals(4, docs.size()); for (Document doc :
+     * docs) { System.out.println("id: " + doc.getId()); for (Field field : doc.getFields()) System.out.println(field.getFieldName() + " -> " +
+     * field.getFieldValue()); }
+     */
   }
   
 }


[41/50] [abbrv] git commit: ACCUMULO-564: turn TestQueryLogic unit test into a functioning unit test by completing the configuration

Posted by uj...@apache.org.
ACCUMULO-564: turn TestQueryLogic unit test into a functioning unit test by completing the configuration

git-svn-id: https://svn.apache.org/repos/asf/accumulo/trunk/examples/wikisearch@1332755 13f79535-47bb-0310-9956-ffa450edef68
(cherry-picked from commit 7bab36a5fd968b287a4ebc1fff65bd2dcc79e8a3)

Reason: Hadoop2 compat
Author: Eric C. Newton <ec...@apache.org>
Ref: ACCUMULO-1977


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/e1585905
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/e1585905
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/e1585905

Branch: refs/heads/master
Commit: e158590545f1ee1a4a14fb63cc6b9d69066b96aa
Parents: c9f213e
Author: Eric C. Newton <ec...@apache.org>
Authored: Tue May 1 17:15:24 2012 +0000
Committer: Sean Busbey <bu...@clouderagovt.com>
Committed: Tue Dec 10 14:13:49 2013 -0600

----------------------------------------------------------------------
 .../examples/wikisearch/logic/TestQueryLogic.java | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1585905/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
index 9079d79..24e7379 100644
--- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
+++ b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.IOException;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map.Entry;
@@ -46,6 +47,7 @@ import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
 import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
 import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
 import org.apache.accumulo.examples.wikisearch.sample.Document;
+import org.apache.accumulo.examples.wikisearch.sample.Field;
 import org.apache.accumulo.examples.wikisearch.sample.Results;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -158,6 +160,7 @@ public class TestQueryLogic {
     table.setIndexTableName(INDEX_TABLE_NAME);
     table.setReverseIndexTableName(RINDEX_TABLE_NAME);
     table.setUseReadAheadIterator(false);
+    table.setUnevaluatedFields(Collections.singletonList("TEXT"));
   }
   
   void debugQuery(String tableName) throws Exception {
@@ -179,13 +182,14 @@ public class TestQueryLogic {
     List<Document> docs = results.getResults();
     assertEquals(4, docs.size());
     
-    /*
-     * debugQuery(METADATA_TABLE_NAME); debugQuery(TABLE_NAME); debugQuery(INDEX_TABLE_NAME); debugQuery(RINDEX_TABLE_NAME);
-     * 
-     * results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null); docs = results.getResults(); assertEquals(4, docs.size()); for (Document doc :
-     * docs) { System.out.println("id: " + doc.getId()); for (Field field : doc.getFields()) System.out.println(field.getFieldName() + " -> " +
-     * field.getFieldValue()); }
-     */
+    results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null);
+    docs = results.getResults();
+    assertEquals(1, docs.size());
+    for (Document doc : docs) {
+      System.out.println("id: " + doc.getId());
+      for (Field field : doc.getFields())
+        System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
+    }
   }
   
 }


[14/50] [abbrv] git commit: ACCUMULO-441 ACCUMULO-411 merged javadocs fixes from trunk to 1.4 and added better links

Posted by uj...@apache.org.
ACCUMULO-441 ACCUMULO-411 merged javadocs fixes from trunk to 1.4 and added better links

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1297265 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/0266eaed
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/0266eaed
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/0266eaed

Branch: refs/heads/master
Commit: 0266eaed00e090933f32ccb3c8405ca5af7df5df
Parents: e24faaf
Author: Billie Rinaldi <bi...@apache.org>
Authored: Mon Mar 5 22:49:35 2012 +0000
Committer: Billie Rinaldi <bi...@apache.org>
Committed: Mon Mar 5 22:49:35 2012 +0000

----------------------------------------------------------------------
 .../wikisearch/ingest/WikipediaMapper.java      |  2 +-
 .../examples/wikisearch/util/TextUtil.java      |  4 +--
 .../iterator/AbstractEvaluatingIterator.java    |  4 +--
 .../wikisearch/iterator/AndIterator.java        | 26 +++++++++----------
 .../wikisearch/iterator/OrIterator.java         | 12 ++++-----
 .../wikisearch/logic/AbstractQueryLogic.java    |  9 -------
 .../parser/FieldIndexQueryReWriter.java         | 27 +++++++-------------
 .../wikisearch/parser/QueryEvaluator.java       |  1 -
 .../wikisearch/parser/RangeCalculator.java      |  4 ---
 .../examples/wikisearch/query/Query.java        |  2 +-
 10 files changed, 34 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0266eaed/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
index a06c57f..fc328cc 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
@@ -112,7 +112,7 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
    * 
    * @param article
    * @param numPartitions
-   * @return
+   * @return The number of the partition for a given article.
    * @throws IllegalFormatException
    */
   public static int getPartitionId(Article article, int numPartitions) throws IllegalFormatException {

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0266eaed/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
index 34b40d1..1623d55 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
@@ -62,9 +62,9 @@ public class TextUtil {
   /**
    * Appends the UTF-8 bytes of the given string to the given {@link Text}
    * 
-   * @param text
+   * @param t
    *          the Text to which to append
-   * @param string
+   * @param s
    *          the String to append
    */
   public static void textAppendNoNull(Text t, String s) {

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0266eaed/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java
index b3f7213..87b4da2 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java
@@ -114,10 +114,10 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat
   public abstract void fillMap(EventFields event, Key key, Value value) throws Exception;
   
   /**
-   * Check to see if this key should be acted upon. Provides the ability to skip this key and all of the following ones that match using the comparator.
+   * Provides the ability to skip this key and all of the following ones that match using the comparator.
    * 
    * @param key
-   * @return
+   * @return true if the key should be acted upon, otherwise false.
    * @throws IOException
    */
   public abstract boolean isKeyAccepted(Key key) throws IOException;

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0266eaed/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
index b675e9b..74fbc0c 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
@@ -96,7 +96,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * Returns the given key's dataLocation
    * 
    * @param key
-   * @return
+   * @return The given key's dataLocation
    */
   protected Text getDataLocation(Key key) {
     return key.getColumnFamily();
@@ -106,7 +106,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * Returns the given key's term
    * 
    * @param key
-   * @return
+   * @return The given key's term
    */
   protected Text getTerm(Key key) {
     int idx = 0;
@@ -120,7 +120,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * Returns the given key's DocID
    * 
    * @param key
-   * @return
+   * @return The given key's DocID
    */
   protected Text getDocID(Key key) {
     int idx = 0;
@@ -134,7 +134,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * Returns the given key's UID
    * 
    * @param key
-   * @return
+   * @return The given key's UID
    */
   protected String getUID(Key key) {
     int idx = 0;
@@ -151,7 +151,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    *          The desired row
    * @param dataLocation
    *          The desired dataLocation
-   * @return
+   * @return A Key object built from the given row and dataLocation.
    */
   protected Key buildKey(Text row, Text dataLocation) {
     return new Key(row, (dataLocation == null) ? nullText : dataLocation);
@@ -166,7 +166,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    *          The desired dataLocation
    * @param term
    *          The desired term
-   * @return
+   * @return A Key object built from the given row, dataLocation, and term.
    */
   protected Key buildKey(Text row, Text dataLocation, Text term) {
     return new Key(row, (dataLocation == null) ? nullText : dataLocation, (term == null) ? nullText : term);
@@ -177,7 +177,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * 
    * @param key
    *          The key who will be directly before the returned key
-   * @return
+   * @return The key directly following the given key.
    */
   protected Key buildFollowingPartitionKey(Key key) {
     return key.followingKey(PartialKey.ROW);
@@ -589,7 +589,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * 
    * @param columns
    *          The columns to be encoded
-   * @return
+   * @return A Base64 encoded string (using a \n delimiter) of all columns to intersect on.
    */
   public static String encodeColumns(Text[] columns) {
     StringBuilder sb = new StringBuilder();
@@ -605,7 +605,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * 
    * @param terms
    *          The terms to be encoded
-   * @return
+   * @return A Base64 encoded string (using a \n delimiter) of all terms to intersect on.
    */
   public static String encodeTermValues(Text[] terms) {
     StringBuilder sb = new StringBuilder();
@@ -622,7 +622,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * 
    * @param flags
    *          The array of NOTs
-   * @return
+   * @return A base64 encoded string of which columns are NOT'ed
    */
   public static String encodeBooleans(boolean[] flags) {
     byte[] bytes = new byte[flags.length];
@@ -641,7 +641,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * 
    * @param columns
    *          The Base64 encoded String of the columns
-   * @return
+   * @return A Text array of the decoded columns
    */
   public static Text[] decodeColumns(String columns) {
     String[] columnStrings = columns.split("\n");
@@ -658,7 +658,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * 
    * @param terms
    *          The Base64 encoded String of the terms
-   * @return
+   * @return A Text array of decoded terms.
    */
   public static Text[] decodeTermValues(String terms) {
     String[] termStrings = terms.split("\n");
@@ -674,7 +674,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
    * Decode the encoded NOT flags into a <code>boolean</code> array
    * 
    * @param flags
-   * @return
+   * @return A boolean array of decoded NOT flags
    */
   public static boolean[] decodeBooleans(String flags) {
     // return null of there were no flags

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0266eaed/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
index d632c82..a217701 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
@@ -150,7 +150,7 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
    * Returns the given key's row
    * 
    * @param key
-   * @return
+   * @return The given key's row
    */
   protected Text getPartition(Key key) {
     return key.getRow();
@@ -160,7 +160,7 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
    * Returns the given key's dataLocation
    * 
    * @param key
-   * @return
+   * @return The given key's dataLocation
    */
   protected Text getDataLocation(Key key) {
     return key.getColumnFamily();
@@ -170,7 +170,7 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
    * Returns the given key's term
    * 
    * @param key
-   * @return
+   * @return The given key's term
    */
   protected Text getTerm(Key key) {
     int idx = 0;
@@ -184,7 +184,7 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
    * Returns the given key's DocID
    * 
    * @param key
-   * @return
+   * @return The given key's DocID
    */
   protected Text getDocID(Key key) {
     int idx = 0;
@@ -198,7 +198,7 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
    * Returns the given key's UID
    * 
    * @param key
-   * @return
+   * @return The given key's UID
    */
   static protected String getUID(Key key) {
     try {
@@ -260,7 +260,7 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
    * Construct the topKey given the current <code>TermSource</code>
    * 
    * @param TS
-   * @return
+   * @return The top Key for a given TermSource
    */
   protected Key buildTopKey(TermSource TS) {
     if ((TS == null) || (TS.topKey == null)) {

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0266eaed/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
index 0f2d472..cb90e92 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
@@ -222,8 +222,6 @@ public abstract class AbstractQueryLogic {
    * @param c
    * @param auths
    * @param queryLiterals
-   * @param begin
-   * @param end
    * @param datatypes
    *          - optional list of types
    * @return map of indexed field names to types to normalizers used in this date range
@@ -286,8 +284,6 @@ public abstract class AbstractQueryLogic {
    * @param c
    * @param auths
    * @param value
-   * @param begin
-   * @param end
    * @param datatypes
    *          - optional list of types
    * @return ranges that fit into the date range.
@@ -305,11 +301,6 @@ public abstract class AbstractQueryLogic {
    *          multimap of indexed field name and Normalizers used
    * @param terms
    *          multimap of field name and QueryTerm object
-   * @param begin
-   *          query begin date
-   * @param end
-   *          query end date
-   * @param dateFormatter
    * @param indexTableName
    * @param reverseIndexTableName
    * @param queryString

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0266eaed/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java
index fc81df2..acfb4f4 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java
@@ -90,7 +90,7 @@ public class FieldIndexQueryReWriter {
    * 
    * @param query
    * @param options
-   * @return
+   * @return String representation of a given query.
    * @throws ParseException
    * @throws Exception
    */
@@ -115,7 +115,7 @@ public class FieldIndexQueryReWriter {
    * 
    * @param query
    * @param options
-   * @return
+   * @return String representation of a given query.
    * @throws ParseException
    * @throws Exception
    */
@@ -140,7 +140,7 @@ public class FieldIndexQueryReWriter {
    * @param query
    * @param fNameUpper
    * @param fValueUpper
-   * @return
+   * @return String representation of a given query.
    * @throws ParseException
    */
   public String applyCaseSensitivity(String query, boolean fNameUpper, boolean fValueUpper) throws ParseException {
@@ -748,9 +748,7 @@ public class FieldIndexQueryReWriter {
   }
   
   /**
-   * 
    * @param options
-   * @return
    */
   public Multimap<String,String> parseIndexedTerms(Map<String,String> options) {
     if (options.get(INDEXED_TERMS_LIST) != null) {
@@ -782,9 +780,7 @@ public class FieldIndexQueryReWriter {
   }
   
   /**
-   * 
    * @param root
-   * @return
    */
   public RewriterTreeNode refactorTree(RewriterTreeNode root) {
     Enumeration<?> dfe = root.breadthFirstEnumeration();
@@ -983,8 +979,7 @@ public class FieldIndexQueryReWriter {
     }
     
     /**
-     * 
-     * @return
+     * @return The field name.
      */
     public String getFieldName() {
       return fieldName;
@@ -1000,7 +995,7 @@ public class FieldIndexQueryReWriter {
     
     /**
      * 
-     * @return
+     * @return The field value.
      */
     public String getFieldValue() {
       return fieldValue;
@@ -1016,7 +1011,7 @@ public class FieldIndexQueryReWriter {
     
     /**
      * 
-     * @return
+     * @return true if negated, otherwise false.
      */
     public boolean isNegated() {
       return negated;
@@ -1032,7 +1027,7 @@ public class FieldIndexQueryReWriter {
     
     /**
      * 
-     * @return
+     * @return The operator.
      */
     public String getOperator() {
       return operator;
@@ -1048,7 +1043,7 @@ public class FieldIndexQueryReWriter {
     
     /**
      * 
-     * @return
+     * @return The type.
      */
     public int getType() {
       return type;
@@ -1070,10 +1065,6 @@ public class FieldIndexQueryReWriter {
       this.removal = removal;
     }
     
-    /**
-     * 
-     * @return
-     */
     public String getContents() {
       StringBuilder s = new StringBuilder("[");
       s.append(toString());
@@ -1092,7 +1083,7 @@ public class FieldIndexQueryReWriter {
     
     /**
      * 
-     * @return
+     * @return A string represenation of the field name and value.
      */
     public String printNode() {
       StringBuilder s = new StringBuilder("[");

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0266eaed/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java
index feac1d3..aaac3d8 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java
@@ -201,7 +201,6 @@ public class QueryEvaluator {
    * Evaluates the query against an event.
    * 
    * @param eventFields
-   * @return
    */
   public boolean evaluate(EventFields eventFields) {
     

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0266eaed/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
index ca3f9bb..d416f60 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
@@ -279,9 +279,6 @@ public class RangeCalculator extends QueryParser {
    * @param auths
    * @param indexedTerms
    * @param terms
-   * @param begin
-   * @param end
-   * @param dateFormatter
    * @param query
    * @param logic
    * @param typeFilter
@@ -564,7 +561,6 @@ public class RangeCalculator extends QueryParser {
    *          mapKey for wildcard and range queries that specify which mapkey to use in the results
    * @param typeFilter
    *          - optional list of datatypes
-   * @return
    * @throws TableNotFoundException
    */
   protected Map<MapKey,TermRange> queryGlobalIndex(Map<MapKey,Set<Range>> indexRanges, String specificFieldName, String tableName, boolean isReverse,

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/0266eaed/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
index 82ac8e5..bffe8ad 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
@@ -204,7 +204,7 @@ public class Query implements IQuery {
    * 
    * @param query
    * @param auths
-   * @return
+   * @return The results of a query
    * @throws ParseException
    */
   public Results query(String query, String auths) {


[15/50] [abbrv] git commit: ACCUMULO-446 ACCUMULO-447 fix "or" conditions, partition ranges, jump's and document debug logging

Posted by uj...@apache.org.
ACCUMULO-446 ACCUMULO-447 fix "or" conditions, partition ranges, jump's and document debug logging

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1299791 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/e1dfeb6e
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/e1dfeb6e
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/e1dfeb6e

Branch: refs/heads/master
Commit: e1dfeb6ebbb147baad576dee055ef462a61a9fff
Parents: 0266eae
Author: Eric C. Newton <ec...@apache.org>
Authored: Mon Mar 12 19:06:21 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Mon Mar 12 19:06:21 2012 +0000

----------------------------------------------------------------------
 README                                          |  8 +++
 .../wikisearch/iterator/AndIterator.java        | 41 +++++---------
 .../iterator/BooleanLogicIterator.java          | 58 +++-----------------
 .../wikisearch/iterator/FieldIndexIterator.java | 45 +++++++++------
 .../wikisearch/iterator/OrIterator.java         |  7 +--
 .../wikisearch/logic/AbstractQueryLogic.java    | 16 ++----
 .../examples/wikisearch/logic/QueryLogic.java   |  9 +--
 .../wikisearch/parser/RangeCalculator.java      | 32 +++++------
 .../examples/wikisearch/query/Query.java        |  4 --
 .../wikisearch/logic/TestQueryLogic.java        |  4 --
 10 files changed, 83 insertions(+), 141 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1dfeb6e/README
----------------------------------------------------------------------
diff --git a/README b/README
index 4844fe6..daec8e4 100644
--- a/README
+++ b/README
@@ -63,3 +63,11 @@
 	There are two parameters to the REST service, query and auths. The query parameter is the same string that you would type
 	into the search box at ui.jsp, and the auths parameter is a comma-separated list of wikis that you want to search (i.e.
 	enwiki,frwiki,dewiki, etc. Or you can use all) 
+	
+	10. Optional. Add the following line to the $ACCUMULO_HOME/conf/log4j.properties file to turn off debug messages in the specialized 
+	iterators, which will dramatically increase performance:
+	
+	log4j.logger.org.apache.accumulo.examples.wikisearch.iterator=INFO,A1
+	
+	This needs to be propagated to all the tablet server nodes, and accumulo needs to be restarted.
+	
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1dfeb6e/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
index 74fbc0c..5ace7c8 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
@@ -759,18 +759,19 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
       log.debug("In AndIterator.seek()");
       log.debug("AndIterator.seek Given range => " + range);
     }
-    // if (firstSeek) {
+    currentRow = new Text();
+    currentDocID.set(emptyByteArray);
+    doSeek(range, seekColumnFamilies, inclusive);
+  }
+  
+  private void doSeek(Range range, Collection<ByteSequence> seekColumnFamilies, boolean inclusive) throws IOException {
+
     overallRange = new Range(range);
-    // firstSeek = false;
-    // }
+
     if (range.getEndKey() != null && range.getEndKey().getRow() != null) {
       this.parentEndRow = range.getEndKey().getRow();
     }
     
-    // overallRange = new Range(range);
-    currentRow = new Text();
-    currentDocID.set(emptyByteArray);
-    
     this.seekColumnFamilies = seekColumnFamilies;
     this.inclusive = inclusive;
     
@@ -801,7 +802,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
       if (overallRange != null && !overallRange.contains(topKey)) {
         topKey = null;
         if (log.isDebugEnabled()) {
-          log.debug("seek, topKey is outside of overall range: " + overallRange);
+          log.debug("doSeek, topKey is outside of overall range: " + overallRange);
         }
       }
     }
@@ -853,16 +854,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
       if (log.isDebugEnabled()) {
         log.debug("jump called, but topKey is null, must need to move to next row");
       }
-      // call seek with the jumpKey
-      
-      Key endKey = null;
-      if (parentEndRow != null) {
-        endKey = new Key(parentEndRow);
-      }
-      Range newRange = new Range(jumpKey, true, endKey, false);
-      this.seek(newRange, seekColumnFamilies, false);
-      // the parent seek should account for the endKey range check.
-      return hasTop();
+      return false;
     } else {
       
       int comp = this.topKey.getRow().compareTo(jumpKey.getRow());
@@ -909,16 +901,13 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
           if (log.isDebugEnabled()) {
             log.debug("jump, uid jump");
           }
-          // move one, and then advanceToIntersection will move the rest.
           Text row = jumpKey.getRow();
-          String cq = topKey.getColumnQualifier().toString();
-          cq = cq.replaceAll(myUid, jumpUid);
-          
-          Key startKey = buildKey(row, topKey.getColumnFamily(), new Text(cq));
-          Range range = new Range(startKey, true, null, false);
-          sources[0].iter.seek(range, seekColumnFamilies, true);
-          advanceToIntersection();
+          Range range = new Range(row);
+          this.currentRow = row;
+          this.currentDocID = new Text(this.getUID(jumpKey));
           
+          doSeek(range, seekColumnFamilies, false);
+
           // make sure it is in the range if we have one.
           if (hasTop() && parentEndRow != null && topKey.getRow().compareTo(parentEndRow) > 0) {
             topKey = null;

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1dfeb6e/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java
index e2d8d89..09ad8d3 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java
@@ -33,7 +33,6 @@ import java.util.Set;
 
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.PartialKey;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
@@ -41,9 +40,9 @@ import org.apache.accumulo.core.iterators.OptionDescriber;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.examples.wikisearch.parser.JexlOperatorConstants;
 import org.apache.accumulo.examples.wikisearch.parser.QueryParser;
-import org.apache.accumulo.examples.wikisearch.parser.TreeNode;
 import org.apache.accumulo.examples.wikisearch.parser.QueryParser.QueryTerm;
 import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator.RangeBounds;
+import org.apache.accumulo.examples.wikisearch.parser.TreeNode;
 import org.apache.accumulo.examples.wikisearch.util.FieldIndexKeyParser;
 import org.apache.commons.jexl2.parser.ASTAndNode;
 import org.apache.commons.jexl2.parser.ASTEQNode;
@@ -63,7 +62,6 @@ import org.apache.hadoop.io.Text;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 
-
 import com.google.common.collect.Multimap;
 
 public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>, OptionDescriber {
@@ -593,7 +591,6 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
       // 3 cases for child: SEL, AND, OR
       // and negation
       BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement();
-      // if (child.getType() == BooleanLogicTreeNode.NodeType.SEL || child.getType() == BooleanLogicTreeNode.NodeType.AND) {
       if (child.getType() == ParserTreeConstants.JJTEQNODE || child.getType() == ParserTreeConstants.JJTNENODE
           || child.getType() == ParserTreeConstants.JJTANDNODE || child.getType() == ParserTreeConstants.JJTERNODE
           || child.getType() == ParserTreeConstants.JJTNRNODE || child.getType() == ParserTreeConstants.JJTLENODE
@@ -1504,48 +1501,14 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
     if (log.isDebugEnabled()) {
       log.debug("jump, All leaves need to advance to: " + jumpKey);
     }
-    
-    Key sKeyRow = new Key(jumpKey.getRow());
-    Key eKeyRow = new Key(jumpKey.followingKey(PartialKey.ROW));
-    Range rowRange = new Range(sKeyRow, true, eKeyRow, false);
-    
-    if (log.isDebugEnabled()) {
-      log.debug("jump, RowRange: " + rowRange);
-    }
-    
+
     String advanceUid = getIndexKeyUid(jumpKey);
     if (log.isDebugEnabled()) {
       log.debug("advanceUid =>  " + advanceUid);
     }
     boolean ok = true;
     for (BooleanLogicTreeNode leaf : positives) {
-      if (leaf.hasTop() && leaf.getTopKey().getRow().toString().compareTo(jumpKey.getRow().toString()) < 0) {
-        // seek
-        if (log.isDebugEnabled()) {
-          log.debug("row Jump on leaf: " + leaf);
-        }
-        ok = leaf.jump(jumpKey);
-        // leaf.seek(rowRange, EMPTY_COL_FAMS, true);
-        
-      } else if (leaf.hasTop() && leaf.getTopKey().getRow().toString().compareTo(jumpKey.getRow().toString()) == 0) {
-        // compare the uid's
-        if (log.isDebugEnabled()) {
-          log.debug("leaf topKey: " + leaf.getTopKey());
-          log.debug("advanceUid: " + advanceUid + "  leafUid: " + getEventKeyUid(leaf.getTopKey()));
-        }
-        
-        if (getEventKeyUid(leaf.getTopKey()).compareTo(advanceUid) < 0) {
-          if (log.isDebugEnabled()) {
-            log.debug("uid Jump on leaf: " + leaf);
-          }
-          ok = leaf.jump(jumpKey);
-        }
-      } else {
-        if (log.isDebugEnabled()) {
-          log.debug("this leaf no jump: " + leaf);
-        }
-        continue;
-      }
+      leaf.jump(jumpKey);
     }
     return ok;
   }
@@ -1842,23 +1805,20 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
     resetNegatives();
     
     // test Tree, if it's not valid, call next
-    if (testTreeState()) {
+    if (testTreeState() && overallRange.contains(root.getTopKey())) {
       if (!negatives.isEmpty()) {
         // now advance negatives
         advanceNegatives(this.root.getTopKey());
         if (!testTreeState()) {
-          if (overallRange.contains(root.getTopKey())) {
-            next();
-          } else {
-            setTopKey(null);
-            return;
-          }
+          next();
         }
       }
       
-      log.debug("overallRange " + overallRange + " topKey " + this.root.getTopKey() + " contains " + overallRange.contains(this.root.getTopKey()));
+      if (log.isDebugEnabled()) {
+        log.debug("overallRange " + overallRange + " topKey " + this.root.getTopKey() + " contains " + overallRange.contains(this.root.getTopKey()));
+      }
 
-      if (overallRange.contains(this.root.getTopKey())) {
+      if (overallRange.contains(this.root.getTopKey()) && this.root.isValid()) {
         setTopKey(this.root.getTopKey());
       } else {
         setTopKey(null);

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1dfeb6e/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/FieldIndexIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/FieldIndexIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/FieldIndexIterator.java
index d3d285f..ad39ab3 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/FieldIndexIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/FieldIndexIterator.java
@@ -22,15 +22,6 @@ import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
 
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.JexlEngine;
-import org.apache.commons.jexl2.MapContext;
-import org.apache.commons.jexl2.parser.ParserTreeConstants;
-import org.apache.hadoop.io.Text;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
@@ -40,6 +31,14 @@ import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.WrappingIterator;
 import org.apache.accumulo.examples.wikisearch.function.QueryFunctions;
 import org.apache.accumulo.examples.wikisearch.util.FieldIndexKeyParser;
+import org.apache.commons.jexl2.Expression;
+import org.apache.commons.jexl2.JexlContext;
+import org.apache.commons.jexl2.JexlEngine;
+import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl2.parser.ParserTreeConstants;
+import org.apache.hadoop.io.Text;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
 
 /**
  * This iterator should only return keys from the fi\0{fieldName}:{fieldValue} part of the shard table. Expect topKey to be CF, {datatype}\0{UID}
@@ -474,13 +473,15 @@ public class FieldIndexIterator extends WrappingIterator {
       }
       
     } else if (comp < 0) { // a row behind jump key, need to move forward
-      String myRow = "";
-      if (hasTop()) {
-        myRow = topKey.getRow().toString();
-      } else if (currentRow != null) {
-        myRow = currentRow.toString();
+      if (log.isDebugEnabled()) {
+        String myRow = "";
+        if (hasTop()) {
+          myRow = topKey.getRow().toString();
+        } else if (currentRow != null) {
+          myRow = currentRow.toString();
+        }
+        log.debug("My row " + myRow + " is less than jump row: " + jumpKey.getRow() + " seeking");
       }
-      log.debug("My row " + myRow + " is less than jump row: " + jumpKey.getRow() + " seeking");
       range = buildRange(jumpKey.getRow());
       // this.seek(range, EMPTY_COL_FAMS, false);
       
@@ -521,8 +522,20 @@ public class FieldIndexIterator extends WrappingIterator {
       }
       if (ucomp < 0) { // need to move up
         log.debug("my uid is less than jumpUid, topUid: " + myUid + "   jumpUid: " + jumpUid);
+        
+        Text cq = jumpKey.getColumnQualifier();
+        int index = cq.find(NULL_BYTE);
+        if (0 <= index) {
+          cq.set(cq.getBytes(), index + 1, cq.getLength() - index - 1);
+        } else {
+          log.error("Expected a NULL separator in the column qualifier");
+          this.topKey = null;
+          this.topValue = null;
+          return false;
+        }
+
         // note my internal range stays the same, I just need to move forward
-        Key startKey = new Key(topKey.getRow(), fName, new Text(fValue + NULL_BYTE + jumpKey.getColumnQualifier()));
+        Key startKey = new Key(topKey.getRow(), fName, new Text(fValue + NULL_BYTE + cq));
         Key endKey = new Key(topKey.getRow(), fName, new Text(fValue + ONE_BYTE));
         range = new Range(startKey, true, endKey, false);
         log.debug("Using range: " + range + " to seek");

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1dfeb6e/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
index a217701..78c8576 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
@@ -342,10 +342,6 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
       log.debug("seek, overallRange: " + overallRange);
     }
     
-    // if (range.getStartKey() != null && range.getStartKey().getRow() != null) {
-    // this.parentStartRow = range.getStartKey().getRow();
-    // }
-    
     if (range.getEndKey() != null && range.getEndKey().getRow() != null) {
       this.parentEndRow = range.getEndKey().getRow();
     }
@@ -688,11 +684,12 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
         if (log.isDebugEnabled()) {
           log.debug("jump called, but ts.topKey is null, this one needs to move to next row.");
         }
+        Key startKey = new Key(jumpKey.getRow(), ts.dataLocation, new Text(ts.term + "\0" + jumpKey.getColumnFamily()));
         Key endKey = null;
         if (parentEndRow != null) {
           endKey = new Key(parentEndRow);
         }
-        Range newRange = new Range(jumpKey, true, endKey, false);
+        Range newRange = new Range(startKey, true, endKey, false);
         ts.iter.seek(newRange, columnFamilies, inclusive);
         ts.setNew();
         advanceToMatch(ts);

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1dfeb6e/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
index cb90e92..5c7c20c 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
@@ -47,12 +47,12 @@ import org.apache.accumulo.examples.wikisearch.iterator.ReadAheadIterator;
 import org.apache.accumulo.examples.wikisearch.normalizer.LcNoDiacriticsNormalizer;
 import org.apache.accumulo.examples.wikisearch.normalizer.Normalizer;
 import org.apache.accumulo.examples.wikisearch.parser.EventFields;
+import org.apache.accumulo.examples.wikisearch.parser.EventFields.FieldValue;
 import org.apache.accumulo.examples.wikisearch.parser.FieldIndexQueryReWriter;
 import org.apache.accumulo.examples.wikisearch.parser.JexlOperatorConstants;
 import org.apache.accumulo.examples.wikisearch.parser.QueryParser;
-import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
-import org.apache.accumulo.examples.wikisearch.parser.EventFields.FieldValue;
 import org.apache.accumulo.examples.wikisearch.parser.QueryParser.QueryTerm;
+import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
 import org.apache.accumulo.examples.wikisearch.sample.Document;
 import org.apache.accumulo.examples.wikisearch.sample.Field;
 import org.apache.accumulo.examples.wikisearch.sample.Results;
@@ -207,7 +207,6 @@ public abstract class AbstractQueryLogic {
   private Kryo kryo = new Kryo();
   private EventFields eventFields = new EventFields();
   private List<String> unevaluatedFields = null;
-  private int numPartitions = 0;
   private Map<Class<? extends Normalizer>,Normalizer> normalizerCacheMap = new HashMap<Class<? extends Normalizer>,Normalizer>();
   private static final String NULL_BYTE = "\u0000";
   
@@ -395,20 +394,13 @@ public abstract class AbstractQueryLogic {
       this.unevaluatedFields.add(field);
   }
   
-  public int getNumPartitions() {
-    return numPartitions;
-  }
-  
-  public void setNumPartitions(int numPartitions) {
-    this.numPartitions = numPartitions;
-  }
-  
   public Document createDocument(Key key, Value value) {
+    Document doc = new Document();
+
     eventFields.clear();
     ByteBuffer buf = ByteBuffer.wrap(value.get());
     eventFields.readObjectData(kryo, buf);
     
-    Document doc = new Document();
     // Set the id to the document id which is located in the colf
     String row = key.getRow().toString();
     String colf = key.getColumnFamily().toString();

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1dfeb6e/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/QueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/QueryLogic.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/QueryLogic.java
index 7d4adc0..bcfeb70 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/QueryLogic.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/QueryLogic.java
@@ -33,8 +33,8 @@ import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.examples.wikisearch.iterator.EvaluatingIterator;
 import org.apache.accumulo.examples.wikisearch.normalizer.LcNoDiacriticsNormalizer;
 import org.apache.accumulo.examples.wikisearch.normalizer.Normalizer;
-import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
 import org.apache.accumulo.examples.wikisearch.parser.QueryParser.QueryTerm;
+import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
 import org.apache.accumulo.examples.wikisearch.protobuf.Uid;
 import org.apache.accumulo.examples.wikisearch.util.TextUtil;
 import org.apache.hadoop.io.Text;
@@ -90,8 +90,6 @@ public class QueryLogic extends AbstractQueryLogic {
   
   protected static Logger log = Logger.getLogger(QueryLogic.class);
   
-  private static String startPartition = "0";
-  
   public QueryLogic() {
     super();
   }
@@ -106,10 +104,7 @@ public class QueryLogic extends AbstractQueryLogic {
   }
   
   protected Collection<Range> getFullScanRange(Date begin, Date end, Multimap<String,QueryTerm> terms) {
-    String startKey = startPartition;
-    String endKey = Integer.toString(this.getNumPartitions());
-    Range r = new Range(startKey, true, endKey, false);
-    return Collections.singletonList(r);
+    return Collections.singletonList(new Range());
   }
   
   @Override

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1dfeb6e/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
index d416f60..8a5474b 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
@@ -28,6 +28,7 @@ import org.apache.accumulo.core.client.BatchScanner;
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.PartialKey;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.Authorizations;
@@ -249,7 +250,6 @@ public class RangeCalculator extends QueryParser {
   protected static Logger log = Logger.getLogger(RangeCalculator.class);
   private static String WILDCARD = ".*";
   private static String SINGLE_WILDCARD = "\\.";
-  protected static String START_ROW = "0";
   
   protected Connector c;
   protected Authorizations auths;
@@ -258,7 +258,6 @@ public class RangeCalculator extends QueryParser {
   protected String indexTableName;
   protected String reverseIndexTableName;
   protected int queryThreads = 8;
-  protected String END_ROW = null;
   
   /* final results of index lookups, ranges for the shard table */
   protected Set<Range> result = null;
@@ -294,7 +293,6 @@ public class RangeCalculator extends QueryParser {
     this.indexTableName = logic.getIndexTableName();
     this.reverseIndexTableName = logic.getReverseIndexTableName();
     this.queryThreads = logic.getQueryThreads();
-    this.END_ROW = Integer.toString(logic.getNumPartitions());
     
     Map<MapKey,Set<Range>> indexRanges = new HashMap<MapKey,Set<Range>>();
     Map<MapKey,Set<Range>> trailingWildcardRanges = new HashMap<MapKey,Set<Range>>();
@@ -340,9 +338,8 @@ public class RangeCalculator extends QueryParser {
           
           // EQUALS
           if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTEQNode.class))) {
-            Key startRange = new Key(fieldValue, fieldName, new Text(START_ROW));
-            Key endRange = new Key(fieldValue, fieldName, new Text(END_ROW));
-            Range r = new Range(startRange, true, endRange, false);
+            Key startRange = new Key(fieldValue, fieldName);
+            Range r = new Range(startRange, true, startRange.followingKey(PartialKey.ROW), true);
             
             MapKey key = new MapKey(fieldName.toString(), fieldValue.toString());
             key.setOriginalQueryValue(value);
@@ -360,9 +357,8 @@ public class RangeCalculator extends QueryParser {
               loc = normalizedFieldValue.indexOf(SINGLE_WILDCARD);
             if (-1 == loc) {
               // Then no wildcard in the query? Treat like the equals case above.
-              Key startRange = new Key(fieldValue, fieldName, new Text(START_ROW));
-              Key endRange = new Key(fieldValue, fieldName, new Text(END_ROW));
-              Range r = new Range(startRange, true, endRange, false);
+              Key startRange = new Key(fieldValue, fieldName);
+              Range r = new Range(startRange, true, startRange.followingKey(PartialKey.ROW), true);
               
               MapKey key = new MapKey(fieldName.toString(), fieldValue.toString());
               key.setOriginalQueryValue(value);
@@ -375,9 +371,9 @@ public class RangeCalculator extends QueryParser {
                 // Then we have a leading wildcard, reverse the term and use the global reverse index.
                 StringBuilder buf = new StringBuilder(normalizedFieldValue.substring(2));
                 normalizedFieldValue = buf.reverse().toString();
-                Key startRange = new Key(new Text(normalizedFieldValue + "\u0000"), fieldName, new Text(START_ROW));
-                Key endRange = new Key(new Text(normalizedFieldValue + "\u10FFFF"), fieldName, new Text(END_ROW));
-                Range r = new Range(startRange, true, endRange, false);
+                Key startRange = new Key(new Text(normalizedFieldValue + "\u0000"), fieldName);
+                Key endRange = new Key(new Text(normalizedFieldValue + "\u10FFFF"), fieldName);
+                Range r = new Range(startRange, true, endRange, true);
                 
                 MapKey key = new MapKey(fieldName.toString(), normalizedFieldValue);
                 key.setOriginalQueryValue(value);
@@ -388,9 +384,9 @@ public class RangeCalculator extends QueryParser {
               } else if (loc == (normalizedFieldValue.length() - 2)) {
                 normalizedFieldValue = normalizedFieldValue.substring(0, loc);
                 // Then we have a trailing wildcard character.
-                Key startRange = new Key(new Text(normalizedFieldValue + "\u0000"), fieldName, new Text(START_ROW));
-                Key endRange = new Key(new Text(normalizedFieldValue + "\u10FFFF"), fieldName, new Text(END_ROW));
-                Range r = new Range(startRange, true, endRange, false);
+                Key startRange = new Key(new Text(normalizedFieldValue + "\u0000"), fieldName);
+                Key endRange = new Key(new Text(normalizedFieldValue + "\u10FFFF"), fieldName);
+                Range r = new Range(startRange, true, endRange, true);
                 
                 MapKey key = new MapKey(fieldName.toString(), normalizedFieldValue);
                 key.setOriginalQueryValue(value);
@@ -438,9 +434,9 @@ public class RangeCalculator extends QueryParser {
           lower = up.getRow();
           upper = lk.getRow();
         }
-        Key startRange = new Key(lower, entry.getKey(), new Text(START_ROW));
-        Key endRange = new Key(upper, entry.getKey(), new Text(END_ROW));
-        Range r = new Range(startRange, true, endRange, false);
+        Key startRange = new Key(lower, entry.getKey());
+        Key endRange = new Key(upper, entry.getKey());
+        Range r = new Range(startRange, true, endRange, true);
         // For the range queries we need to query the global index and then handle the results a little differently.
         Map<MapKey,Set<Range>> ranges = new HashMap<MapKey,Set<Range>>();
         MapKey key = new MapKey(entry.getKey().toString(), entry.getValue().getLower().toString());

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1dfeb6e/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
index bffe8ad..d7dab3a 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
@@ -70,9 +70,6 @@ public class Query implements IQuery {
   @Resource(name = "tableName")
   private String tableName;
   
-  @Resource(name = "partitions")
-  private int partitions;
-  
   @Resource(name = "threads")
   private int threads;
   
@@ -235,7 +232,6 @@ public class Query implements IQuery {
     table.setReverseIndexTableName(tableName + "ReverseIndex");
     table.setQueryThreads(threads);
     table.setUnevaluatedFields("TEXT");
-    table.setNumPartitions(partitions);
     table.setUseReadAheadIterator(false);
     return table.runQuery(connector, authorizations, query, null, null, null);
   }

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e1dfeb6e/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
index 7276360..8400fb5 100644
--- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
+++ b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
@@ -37,7 +37,6 @@ import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
 import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
@@ -50,7 +49,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RawLocalFileSystem;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.RecordWriter;
@@ -162,8 +160,6 @@ public class TestQueryLogic {
     table.setIndexTableName(INDEX_TABLE_NAME);
     table.setReverseIndexTableName(RINDEX_TABLE_NAME);
     table.setUseReadAheadIterator(false);
-    table.setNumPartitions(1);
-    
   }
   
   void debugQuery(String tableName) throws Exception {


[20/50] [abbrv] git commit: ACCUMULO-481: update revision, remove "incubating"

Posted by uj...@apache.org.
ACCUMULO-481: update revision, remove "incubating"

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1303527 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/eb86eae3
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/eb86eae3
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/eb86eae3

Branch: refs/heads/master
Commit: eb86eae3d6a1843793e2d08ee5a3fef2400ac0f8
Parents: e7e7928
Author: Eric C. Newton <ec...@apache.org>
Authored: Wed Mar 21 19:44:40 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Wed Mar 21 19:44:40 2012 +0000

----------------------------------------------------------------------
 ingest/bin/ingest.sh          | 2 +-
 ingest/bin/ingest_parallel.sh | 2 +-
 ingest/pom.xml                | 2 +-
 pom.xml                       | 2 +-
 query-war/pom.xml             | 2 +-
 query/pom.xml                 | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/eb86eae3/ingest/bin/ingest.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest.sh b/ingest/bin/ingest.sh
index f743412..39afff0 100755
--- a/ingest/bin/ingest.sh
+++ b/ingest/bin/ingest.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.0-incubating-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.1-SNAPSHOT.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/eb86eae3/ingest/bin/ingest_parallel.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest_parallel.sh b/ingest/bin/ingest_parallel.sh
index 74dce9c..2407cef 100755
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.0-incubating-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.1-SNAPSHOT.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/eb86eae3/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index 743a4e8..3a4e4fd 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.0-incubating-SNAPSHOT</version>
+    <version>1.4.1-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/eb86eae3/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 91f97b5..5859604 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
   <parent>
         <artifactId>accumulo-examples</artifactId>
         <groupId>org.apache.accumulo</groupId>
-        <version>1.4.0-incubating-SNAPSHOT</version>
+        <version>1.4.1-SNAPSHOT</version>
         <relativePath>../</relativePath>
   </parent>
   <artifactId>accumulo-wikisearch</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/eb86eae3/query-war/pom.xml
----------------------------------------------------------------------
diff --git a/query-war/pom.xml b/query-war/pom.xml
index 862f2aa..50ff054 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.0-incubating-SNAPSHOT</version>
+    <version>1.4.1-SNAPSHOT</version>
   </parent>
 
   <artifactId>wikisearch-query-war</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/eb86eae3/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index 9ec1ad6..1c51744 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.0-incubating-SNAPSHOT</version>
+    <version>1.4.1-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 


[39/50] [abbrv] git commit: ACCUMULO-876 added maven profiles to assist in compiling against different versions of hadoop

Posted by uj...@apache.org.
ACCUMULO-876 added maven profiles to assist in compiling against different versions of hadoop

git-svn-id: https://svn.apache.org/repos/asf/accumulo/trunk@1419924 13f79535-47bb-0310-9956-ffa450edef68
(cherry picked from commit 65c271c2a46b87eb0e4991d7fd7b6fbd7456c0a8)

Reason: Hadoop2 Compat
Author: Billie Rinaldi <bi...@apache.org>
Ref: ACCUMULO-1792

This patch differs from upstream by fixing poms not in the accumulo 1.5 release and keeping the 1.4 specific build instructions in the README.

Author: Jonathan Hsieh <jo...@cloudera.com>
Author: Sean Busbey <bu...@cloudera.com>

Signed-off-by: Eric Newton <er...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/f0b42c77
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/f0b42c77
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/f0b42c77

Branch: refs/heads/master
Commit: f0b42c779f8c8fdedbc8b5eee04bbe69679d7f60
Parents: 5cd987d
Author: Billie Rinaldi <bi...@apache.org>
Authored: Tue Dec 11 00:01:39 2012 +0000
Committer: Eric Newton <er...@gmail.com>
Committed: Mon Nov 25 16:06:42 2013 -0500

----------------------------------------------------------------------
 ingest/pom.xml | 42 +++++++++++++++++++++++++++++++++++++-----
 query/pom.xml  | 43 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 75 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/f0b42c77/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index 29b2047..31d7110 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -29,11 +29,7 @@
 
   <dependencies>
     <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-core</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
+      <groupId>org.apache.zookeeper</groupId>
       <artifactId>zookeeper</artifactId>
     </dependency>
     <dependency>
@@ -125,4 +121,40 @@
     </plugins>
   </build>
 
+  <profiles>
+    <!-- profile for building against Hadoop 1.0.x
+    Activate by not specifying hadoop.profile -->
+    <profile>
+      <id>hadoop-1.0</id>
+      <activation>
+        <property>
+          <name>!hadoop.profile</name>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+    <!-- profile for building against Hadoop 2.0.x
+    Activate using: mvn -Dhadoop.profile=2.0 -->
+    <profile>
+      <id>hadoop-2.0</id>
+      <activation>
+        <property>
+          <name>hadoop.profile</name>
+          <value>2.0</value>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
 </project>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/f0b42c77/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index 05b1e46..8c6e6d3 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -38,11 +38,7 @@
       <artifactId>accumulo-core</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-core</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
+      <groupId>org.apache.zookeeper</groupId>
       <artifactId>zookeeper</artifactId>
       <scope>runtime</scope>
     </dependency>
@@ -144,4 +140,41 @@
     </plugins>
   </build>
   <modelVersion>4.0.0</modelVersion>
+
+  <profiles>
+    <!-- profile for building against Hadoop 1.0.x
+    Activate by not specifying hadoop.profile -->
+    <profile>
+      <id>hadoop-1.0</id>
+      <activation>
+        <property>
+          <name>!hadoop.profile</name>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+    <!-- profile for building against Hadoop 2.0.x
+    Activate using: mvn -Dhadoop.profile=2.0 -->
+    <profile>
+      <id>hadoop-2.0</id>
+      <activation>
+        <property>
+          <name>hadoop.profile</name>
+          <value>2.0</value>
+        </property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
 </project>


[28/50] [abbrv] git commit: ACCUMULO-1168 moving 1.4 branch to 1.4.4-SNAPSHOT

Posted by uj...@apache.org.
ACCUMULO-1168 moving 1.4 branch to 1.4.4-SNAPSHOT

git-svn-id: https://svn.apache.org/repos/asf/accumulo/branches/1.4@1455158 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/06c52491
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/06c52491
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/06c52491

Branch: refs/heads/master
Commit: 06c52491eebc4438b19fa6f6853b5ecc6d100f7d
Parents: bb80d51
Author: Eric C. Newton <ec...@apache.org>
Authored: Mon Mar 11 14:19:51 2013 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Mon Mar 11 14:19:51 2013 +0000

----------------------------------------------------------------------
 ingest/pom.xml    | 2 +-
 pom.xml           | 2 +-
 query-war/pom.xml | 2 +-
 query/pom.xml     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/06c52491/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index e5124eb..f2a8f77 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.3-SNAPSHOT</version>
+    <version>1.4.4-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/06c52491/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index eedf7b6..1036436 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
   <parent>
         <artifactId>accumulo-examples</artifactId>
         <groupId>org.apache.accumulo</groupId>
-        <version>1.4.3-SNAPSHOT</version>
+        <version>1.4.4-SNAPSHOT</version>
         <relativePath>../</relativePath>
   </parent>
   <artifactId>accumulo-wikisearch</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/06c52491/query-war/pom.xml
----------------------------------------------------------------------
diff --git a/query-war/pom.xml b/query-war/pom.xml
index 47133b4..555f995 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.3-SNAPSHOT</version>
+    <version>1.4.4-SNAPSHOT</version>
   </parent>
 
   <artifactId>wikisearch-query-war</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/06c52491/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index d80d28c..9d20269 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.3-SNAPSHOT</version>
+    <version>1.4.4-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
 


[24/50] [abbrv] git commit: ACCUMULO-665 pass the appropriate column families down to the underlying iterators so they stay out of document locality group

Posted by uj...@apache.org.
ACCUMULO-665 pass the appropriate column families down to the underlying iterators so they stay out of document locality group

git-svn-id: https://svn.apache.org/repos/asf/accumulo/branches/1.4@1358234 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/a286a879
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/a286a879
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/a286a879

Branch: refs/heads/master
Commit: a286a8793e484a8b64b2d578341e939891522069
Parents: a3ca5a7
Author: Eric C. Newton <ec...@apache.org>
Authored: Fri Jul 6 14:27:38 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Fri Jul 6 14:27:38 2012 +0000

----------------------------------------------------------------------
 .../apache/accumulo/examples/wikisearch/iterator/AndIterator.java | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/a286a879/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
----------------------------------------------------------------------
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
index eafc4dd..734d423 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
@@ -47,7 +47,6 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
   private Text currentRow = null;
   private Text currentTerm = new Text(emptyByteArray);
   private Text currentDocID = new Text(emptyByteArray);
-  private Collection<ByteSequence> seekColumnFamilies;
   private Text parentEndRow;
   private static boolean SEEK_INCLUSIVE = true;
   
@@ -870,7 +869,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
         }
         Key sKey = new Key(jumpKey.getRow());
         Range fake = new Range(sKey, true, endKey, false);
-        this.seek(fake, this.seekColumnFamilies, false);
+        this.seek(fake, null, false);
         return hasTop();
       } else {
         // need to check uid


[29/50] [abbrv] git commit: ACCUMULO-1168 preparing 1.4.3 tag

Posted by uj...@apache.org.
ACCUMULO-1168 preparing 1.4.3 tag

git-svn-id: https://svn.apache.org/repos/asf/accumulo/branches/1.4.3@1455159 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/99862355
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/99862355
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/99862355

Branch: refs/heads/master
Commit: 99862355fcda39330fc5c0b9853f8e22a8c399af
Parents: bb80d51
Author: Eric C. Newton <ec...@apache.org>
Authored: Mon Mar 11 14:21:27 2013 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Mon Mar 11 14:21:27 2013 +0000

----------------------------------------------------------------------
 ingest/bin/ingest.sh          | 2 +-
 ingest/bin/ingest_parallel.sh | 2 +-
 ingest/pom.xml                | 2 +-
 pom.xml                       | 2 +-
 query-war/pom.xml             | 2 +-
 query/pom.xml                 | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/99862355/ingest/bin/ingest.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest.sh b/ingest/bin/ingest.sh
index 8dca09c..54ed26f 100755
--- a/ingest/bin/ingest.sh
+++ b/ingest/bin/ingest.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.3-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.3.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/99862355/ingest/bin/ingest_parallel.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest_parallel.sh b/ingest/bin/ingest_parallel.sh
index 2f2bb5b..0a31926 100755
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@ -38,7 +38,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 #
 # Map/Reduce job
 #
-JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.3-SNAPSHOT.jar
+JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.3.jar
 CONF=$SCRIPT_DIR/../conf/wikipedia.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/99862355/ingest/pom.xml
----------------------------------------------------------------------
diff --git a/ingest/pom.xml b/ingest/pom.xml
index e5124eb..c63e040 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.3-SNAPSHOT</version>
+    <version>1.4.3</version>
     <relativePath>../</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/99862355/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index eedf7b6..b61ced7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
   <parent>
         <artifactId>accumulo-examples</artifactId>
         <groupId>org.apache.accumulo</groupId>
-        <version>1.4.3-SNAPSHOT</version>
+        <version>1.4.3</version>
         <relativePath>../</relativePath>
   </parent>
   <artifactId>accumulo-wikisearch</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/99862355/query-war/pom.xml
----------------------------------------------------------------------
diff --git a/query-war/pom.xml b/query-war/pom.xml
index 47133b4..2500edc 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.3-SNAPSHOT</version>
+    <version>1.4.3</version>
   </parent>
 
   <artifactId>wikisearch-query-war</artifactId>

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/99862355/query/pom.xml
----------------------------------------------------------------------
diff --git a/query/pom.xml b/query/pom.xml
index d80d28c..44ae21e 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>accumulo-wikisearch</artifactId>
     <groupId>org.apache.accumulo</groupId>
-    <version>1.4.3-SNAPSHOT</version>
+    <version>1.4.3</version>
     <relativePath>../</relativePath>
   </parent>
 


[11/50] [abbrv] git commit: ACCUMULO-412 fix index search

Posted by uj...@apache.org.
ACCUMULO-412 fix index search

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1245631 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/27fa06e3
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/27fa06e3
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/27fa06e3

Branch: refs/heads/master
Commit: 27fa06e339eb1fcef4cabfa71576a017d7b109dc
Parents: 1e05129
Author: Eric C. Newton <ec...@apache.org>
Authored: Fri Feb 17 16:03:47 2012 +0000
Committer: Eric C. Newton <ec...@apache.org>
Committed: Fri Feb 17 16:03:47 2012 +0000

----------------------------------------------------------------------
 .../ingest/WikipediaPartitionedMapper.java      |  3 +--
 .../wikisearch/ingest/WikipediaPartitioner.java | 21 +-------------------
 .../output/SortingRFileOutputFormat.java        |  8 +++-----
 3 files changed, 5 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/27fa06e3/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
index 5e82a7d..bb4ae64 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java
@@ -42,14 +42,13 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.log4j.Logger;
 
 import com.google.common.collect.HashMultimap;
 import com.google.common.collect.Multimap;
 
 public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutation> {
   
-  private static final Logger log = Logger.getLogger(WikipediaPartitionedMapper.class);
+  // private static final Logger log = Logger.getLogger(WikipediaPartitionedMapper.class);
   
   public final static Charset UTF8 = Charset.forName("UTF-8");
   public static final String DOCUMENT_COLUMN_FAMILY = "d";

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/27fa06e3/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitioner.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitioner.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitioner.java
index 82af9fd..3507108 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitioner.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitioner.java
@@ -23,40 +23,21 @@ package org.apache.accumulo.examples.wikisearch.ingest;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
-import java.io.StringReader;
 import java.nio.charset.Charset;
-import java.util.HashSet;
-import java.util.IllegalFormatException;
-import java.util.Map.Entry;
-import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.ColumnVisibility;
 import org.apache.accumulo.examples.wikisearch.ingest.ArticleExtractor.Article;
 import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
-import org.apache.accumulo.examples.wikisearch.normalizer.LcNoDiacriticsNormalizer;
-import org.apache.accumulo.examples.wikisearch.protobuf.Uid;
-import org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.log4j.Logger;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.wikipedia.analysis.WikipediaTokenizer;
-
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.Multimap;
 
 public class WikipediaPartitioner extends Mapper<LongWritable,Text,Text,Article> {
   
-  private static final Logger log = Logger.getLogger(WikipediaPartitioner.class);
+  // private static final Logger log = Logger.getLogger(WikipediaPartitioner.class);
   
   public final static Charset UTF8 = Charset.forName("UTF-8");
   public static final String DOCUMENT_COLUMN_FAMILY = "d";

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/27fa06e3/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
index d8c57c2..2738e2c 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java
@@ -4,20 +4,18 @@ import java.io.IOException;
 
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.io.Text;
-import org.apache.log4j.Logger;
 
 public class SortingRFileOutputFormat extends OutputFormat<Text,Mutation> {
 
-  private static final Logger log = Logger.getLogger(SortingRFileOutputFormat.class);
+  // private static final Logger log = Logger.getLogger(SortingRFileOutputFormat.class);
 
   public static final String PATH_NAME = "sortingrfileoutputformat.path";
   public static final String MAX_BUFFER_SIZE = "sortingrfileoutputformat.max.buffer.size";


[47/50] [abbrv] git commit: ACCUMULO-2020 Wikisearch should manage dependency conflicts.

Posted by uj...@apache.org.
ACCUMULO-2020 Wikisearch should manage dependency conflicts.

 Update enforcer plugin to require dependency convergence, fix conflicts.


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/73e321e0
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/73e321e0
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/73e321e0

Branch: refs/heads/master
Commit: 73e321e0e9dfb9cd5efe00cefd0c6f1eb9fa3242
Parents: 49f553a
Author: Sean Busbey <bu...@clouderagovt.com>
Authored: Wed Dec 11 00:28:46 2013 -0600
Committer: Sean Busbey <bu...@cloudera.com>
Committed: Mon Mar 10 11:04:46 2014 -0500

----------------------------------------------------------------------
 pom.xml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/73e321e0/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 69b85db..6be16f0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -63,6 +63,11 @@
         <executions>
           <execution>
             <id>enforce-mvn</id>
+            <configuration>
+              <rules>
+                <DependencyConvergence/>
+              </rules>
+            </configuration>
             <goals>
               <goal>enforce</goal>
             </goals>
@@ -226,6 +231,12 @@
         <groupId>org.apache.lucene</groupId>
         <artifactId>lucene-wikipedia</artifactId>
         <version>${version.lucene-wikipedia}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>commons-digester</groupId>
+            <artifactId>commons-digester</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>com.google.protobuf</groupId>
@@ -246,6 +257,24 @@
       	<groupId>com.sun.jersey</groupId>
       	<artifactId>jersey-server</artifactId>
       	<version>1.11</version>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <!-- XXX This is just to fix the dependency conflict in Hadoop 1 -->
+      <dependency>
+        <groupId>net.java.dev.jets3t</groupId>
+        <artifactId>jets3t</artifactId>
+        <version>0.7.1</version>
+        <exclusions>
+          <exclusion>
+            <groupId>commons-httpclient</groupId>
+            <artifactId>commons-httpclient</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
     </dependencies>
   </dependencyManagement>


[44/50] [abbrv] git commit: ACCUMULO-2018 Wikisearch Parallel Ingest script should use parallel configuration file.

Posted by uj...@apache.org.
ACCUMULO-2018 Wikisearch Parallel Ingest script should use parallel configuration file.

 make sure parallel ingester uses parallel config, per README.parallel


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/bcf16abe
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/bcf16abe
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/bcf16abe

Branch: refs/heads/master
Commit: bcf16abe896e6ebb93a0e935473ec7a2afcc8ec2
Parents: e84d8d7
Author: Sean Busbey <bu...@clouderagovt.com>
Authored: Wed Dec 11 02:18:37 2013 -0600
Committer: Sean Busbey <bu...@cloudera.com>
Committed: Mon Mar 10 10:09:02 2014 -0500

----------------------------------------------------------------------
 ingest/bin/ingest_parallel.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/bcf16abe/ingest/bin/ingest_parallel.sh
----------------------------------------------------------------------
diff --git a/ingest/bin/ingest_parallel.sh b/ingest/bin/ingest_parallel.sh
index 8c63ac0..62e79db 100755
--- a/ingest/bin/ingest_parallel.sh
+++ b/ingest/bin/ingest_parallel.sh
@@ -39,7 +39,7 @@ LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 # Map/Reduce job
 #
 JAR=$SCRIPT_DIR/../lib/wikisearch-ingest-1.4.5-SNAPSHOT.jar
-CONF=$SCRIPT_DIR/../conf/wikipedia.xml
+CONF=$SCRIPT_DIR/../conf/wikipedia_parallel.xml
 HDFS_DATA_DIR=$1
 export HADOOP_CLASSPATH=$CLASSPATH
 echo "hadoop jar $JAR org.apache.accumulo.examples.wikisearch.ingest.WikipediaPartitionedIngester -libjars $LIBJARS -conf $CONF -Dwikipedia.input=${HDFS_DATA_DIR}"


[27/50] [abbrv] git commit: ACCUMULO-912 - fixing tests for fixed Mock

Posted by uj...@apache.org.
ACCUMULO-912 - fixing tests for fixed Mock



git-svn-id: https://svn.apache.org/repos/asf/accumulo/branches/1.4@1423547 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/bb80d51f
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/bb80d51f
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/bb80d51f

Branch: refs/heads/master
Commit: bb80d51fa057bdbe1853110edddf6ec465249365
Parents: ab20ab7
Author: John Vines <vi...@apache.org>
Authored: Tue Dec 18 17:38:46 2012 +0000
Committer: John Vines <vi...@apache.org>
Committed: Tue Dec 18 17:38:46 2012 +0000

----------------------------------------------------------------------
 .../apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/bb80d51f/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
----------------------------------------------------------------------
diff --git a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
index 8400fb5..4b7aaee 100644
--- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
+++ b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java
@@ -116,7 +116,7 @@ public class TestQueryLogic {
     conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
     
     MockInstance i = new MockInstance();
-    c = i.getConnector("root", "pass");
+    c = i.getConnector("root", "");
     for (String table : TABLE_NAMES) {
       try {
         c.tableOperations().delete(table);