You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by mm...@apache.org on 2019/06/07 18:11:28 UTC

[accumulo-wikisearch] branch master updated: Minor fixes

This is an automated email from the ASF dual-hosted git repository.

mmiller pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo-wikisearch.git


The following commit(s) were added to refs/heads/master by this push:
     new 10bb3d1  Minor fixes
10bb3d1 is described below

commit 10bb3d158eb6bed6e584ce79e1aa661369488d7e
Author: Mike Miller <mm...@apache.org>
AuthorDate: Fri Jun 7 14:11:03 2019 -0400

    Minor fixes
---
 ingest/pom.xml                                                    | 4 ++--
 .../accumulo/examples/wikisearch/ingest/WikipediaMapper.java      | 8 +-------
 pom.xml                                                           | 2 +-
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/ingest/pom.xml b/ingest/pom.xml
index 1f6bc99..a8a0a6c 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -84,8 +84,8 @@
             <phase>prepare-package</phase>
             <configuration>
               <outputDirectory>lib</outputDirectory>
-              &lt;!&ndash; just grab the non-provided runtime dependencies &ndash;&gt;
-              &lt;!&ndash; XXX we include guava at the same version as hadoop 2 provides so that we have it on hadoop 1 &ndash;&gt;
+	      <!-- just grab the non-provided runtime dependencies -->
+	      <!-- XXX we include guava at the same version as hadoop 2 provides so that we have it on hadoop 1 -->
               <includeArtifactIds>commons-lang,guava,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,zookeeper,commons-codec,accumulo-fate,accumulo-trace</includeArtifactIds>
               <excludeTransitive>false</excludeTransitive>
             </configuration>
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
index c2fed03..63f1d42 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
@@ -226,13 +226,7 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
   static Set<String> getTokens(Article article) {
     Set<String> tokenList = new HashSet<>();
     try (WikipediaTokenizer tok = new WikipediaTokenizer(new StringReader(article.getText()))) {
-      Attribute term = tok.addAttribute(Attribute.class);
-      while (tok.incrementToken()) {
-        String token = term.toString();
-        if (!StringUtils.isEmpty(token)) {
-          tokenList.add(token);
-        }
-      }
+      tokenList.add(tok.toString());
     } catch (IOException e) {
       log.error("Error tokenizing text", e);
     }
diff --git a/pom.xml b/pom.xml
index ba62cf0..e997c9f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -375,7 +375,7 @@
             <phase>prepare-package</phase>
             <configuration>
               <outputDirectory>../../lib</outputDirectory>
-                &lt;!&ndash; just grab the non-provided runtime dependencies &ndash;&gt;
+                <!-- just grab the non-provided runtime dependencies -->
               <includeArtifactIds>commons-collections,commons-configuration,commons-io,commons-lang,jline,log4j,libthrift,commons-jci-core,commons-jci-fam,commons-logging,commons-logging-api</includeArtifactIds>
               <excludeGroupIds>accumulo</excludeGroupIds>
               <excludeTransitive>true</excludeTransitive>