You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ec...@apache.org on 2013/11/26 16:18:10 UTC
[15/37] ACCUMULO-600 removed wikisearch from trunk
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java
deleted file mode 100644
index c469748..0000000
--- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// Generated by the protocol buffer compiler. DO NOT EDIT!
-// source: Uid.proto
-
-package org.apache.accumulo.examples.wikisearch.protobuf;
-
-public final class Uid {
- private Uid() {}
-
- public static void registerAllExtensions(com.google.protobuf.ExtensionRegistry registry) {}
-
- public static final class List extends com.google.protobuf.GeneratedMessage {
- // Use List.newBuilder() to construct.
- private List() {
- initFields();
- }
-
- private List(boolean noInit) {}
-
- private static final List defaultInstance;
-
- public static List getDefaultInstance() {
- return defaultInstance;
- }
-
- public List getDefaultInstanceForType() {
- return defaultInstance;
- }
-
- public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() {
- return org.apache.accumulo.examples.wikisearch.protobuf.Uid.internal_static_protobuf_List_descriptor;
- }
-
- protected com.google.protobuf.GeneratedMessage.FieldAccessorTable internalGetFieldAccessorTable() {
- return org.apache.accumulo.examples.wikisearch.protobuf.Uid.internal_static_protobuf_List_fieldAccessorTable;
- }
-
- // required bool IGNORE = 1;
- public static final int IGNORE_FIELD_NUMBER = 1;
- private boolean hasIGNORE;
- private boolean iGNORE_ = false;
-
- public boolean hasIGNORE() {
- return hasIGNORE;
- }
-
- public boolean getIGNORE() {
- return iGNORE_;
- }
-
- // required uint64 COUNT = 2;
- public static final int COUNT_FIELD_NUMBER = 2;
- private boolean hasCOUNT;
- private long cOUNT_ = 0L;
-
- public boolean hasCOUNT() {
- return hasCOUNT;
- }
-
- public long getCOUNT() {
- return cOUNT_;
- }
-
- // repeated string UID = 3;
- public static final int UID_FIELD_NUMBER = 3;
- private java.util.List<java.lang.String> uID_ = java.util.Collections.emptyList();
-
- public java.util.List<java.lang.String> getUIDList() {
- return uID_;
- }
-
- public int getUIDCount() {
- return uID_.size();
- }
-
- public java.lang.String getUID(int index) {
- return uID_.get(index);
- }
-
- private void initFields() {}
-
- public final boolean isInitialized() {
- if (!hasIGNORE)
- return false;
- if (!hasCOUNT)
- return false;
- return true;
- }
-
- public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException {
- getSerializedSize();
- if (hasIGNORE()) {
- output.writeBool(1, getIGNORE());
- }
- if (hasCOUNT()) {
- output.writeUInt64(2, getCOUNT());
- }
- for (java.lang.String element : getUIDList()) {
- output.writeString(3, element);
- }
- getUnknownFields().writeTo(output);
- }
-
- private int memoizedSerializedSize = -1;
-
- public int getSerializedSize() {
- int size = memoizedSerializedSize;
- if (size != -1)
- return size;
-
- size = 0;
- if (hasIGNORE()) {
- size += com.google.protobuf.CodedOutputStream.computeBoolSize(1, getIGNORE());
- }
- if (hasCOUNT()) {
- size += com.google.protobuf.CodedOutputStream.computeUInt64Size(2, getCOUNT());
- }
- {
- int dataSize = 0;
- for (java.lang.String element : getUIDList()) {
- dataSize += com.google.protobuf.CodedOutputStream.computeStringSizeNoTag(element);
- }
- size += dataSize;
- size += 1 * getUIDList().size();
- }
- size += getUnknownFields().getSerializedSize();
- memoizedSerializedSize = size;
- return size;
- }
-
- public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException {
- return newBuilder().mergeFrom(data).buildParsed();
- }
-
- public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
- throws com.google.protobuf.InvalidProtocolBufferException {
- return newBuilder().mergeFrom(data, extensionRegistry).buildParsed();
- }
-
- public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException {
- return newBuilder().mergeFrom(data).buildParsed();
- }
-
- public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
- throws com.google.protobuf.InvalidProtocolBufferException {
- return newBuilder().mergeFrom(data, extensionRegistry).buildParsed();
- }
-
- public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(java.io.InputStream input) throws java.io.IOException {
- return newBuilder().mergeFrom(input).buildParsed();
- }
-
- public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
- throws java.io.IOException {
- return newBuilder().mergeFrom(input, extensionRegistry).buildParsed();
- }
-
- public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException {
- Builder builder = newBuilder();
- if (builder.mergeDelimitedFrom(input)) {
- return builder.buildParsed();
- } else {
- return null;
- }
- }
-
- public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseDelimitedFrom(java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
- throws java.io.IOException {
- Builder builder = newBuilder();
- if (builder.mergeDelimitedFrom(input, extensionRegistry)) {
- return builder.buildParsed();
- } else {
- return null;
- }
- }
-
- public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.CodedInputStream input) throws java.io.IOException {
- return newBuilder().mergeFrom(input).buildParsed();
- }
-
- public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
- throws java.io.IOException {
- return newBuilder().mergeFrom(input, extensionRegistry).buildParsed();
- }
-
- public static Builder newBuilder() {
- return Builder.create();
- }
-
- public Builder newBuilderForType() {
- return newBuilder();
- }
-
- public static Builder newBuilder(org.apache.accumulo.examples.wikisearch.protobuf.Uid.List prototype) {
- return newBuilder().mergeFrom(prototype);
- }
-
- public Builder toBuilder() {
- return newBuilder(this);
- }
-
- public static final class Builder extends com.google.protobuf.GeneratedMessage.Builder<Builder> {
- private org.apache.accumulo.examples.wikisearch.protobuf.Uid.List result;
-
- // Construct using protobuf.Uid.List.newBuilder()
- private Builder() {}
-
- private static Builder create() {
- Builder builder = new Builder();
- builder.result = new org.apache.accumulo.examples.wikisearch.protobuf.Uid.List();
- return builder;
- }
-
- protected org.apache.accumulo.examples.wikisearch.protobuf.Uid.List internalGetResult() {
- return result;
- }
-
- public Builder clear() {
- if (result == null) {
- throw new IllegalStateException("Cannot call clear() after build().");
- }
- result = new org.apache.accumulo.examples.wikisearch.protobuf.Uid.List();
- return this;
- }
-
- public Builder clone() {
- return create().mergeFrom(result);
- }
-
- public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() {
- return org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.getDescriptor();
- }
-
- public org.apache.accumulo.examples.wikisearch.protobuf.Uid.List getDefaultInstanceForType() {
- return org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.getDefaultInstance();
- }
-
- public boolean isInitialized() {
- return result.isInitialized();
- }
-
- public org.apache.accumulo.examples.wikisearch.protobuf.Uid.List build() {
- if (result != null && !isInitialized()) {
- throw newUninitializedMessageException(result);
- }
- return buildPartial();
- }
-
- private org.apache.accumulo.examples.wikisearch.protobuf.Uid.List buildParsed() throws com.google.protobuf.InvalidProtocolBufferException {
- if (!isInitialized()) {
- throw newUninitializedMessageException(result).asInvalidProtocolBufferException();
- }
- return buildPartial();
- }
-
- public org.apache.accumulo.examples.wikisearch.protobuf.Uid.List buildPartial() {
- if (result == null) {
- throw new IllegalStateException("build() has already been called on this Builder.");
- }
- if (result.uID_ != java.util.Collections.EMPTY_LIST) {
- result.uID_ = java.util.Collections.unmodifiableList(result.uID_);
- }
- org.apache.accumulo.examples.wikisearch.protobuf.Uid.List returnMe = result;
- result = null;
- return returnMe;
- }
-
- public Builder mergeFrom(com.google.protobuf.Message other) {
- if (other instanceof org.apache.accumulo.examples.wikisearch.protobuf.Uid.List) {
- return mergeFrom((org.apache.accumulo.examples.wikisearch.protobuf.Uid.List) other);
- } else {
- super.mergeFrom(other);
- return this;
- }
- }
-
- public Builder mergeFrom(org.apache.accumulo.examples.wikisearch.protobuf.Uid.List other) {
- if (other == org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.getDefaultInstance())
- return this;
- if (other.hasIGNORE()) {
- setIGNORE(other.getIGNORE());
- }
- if (other.hasCOUNT()) {
- setCOUNT(other.getCOUNT());
- }
- if (!other.uID_.isEmpty()) {
- if (result.uID_.isEmpty()) {
- result.uID_ = new java.util.ArrayList<java.lang.String>();
- }
- result.uID_.addAll(other.uID_);
- }
- this.mergeUnknownFields(other.getUnknownFields());
- return this;
- }
-
- public Builder mergeFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
- throws java.io.IOException {
- com.google.protobuf.UnknownFieldSet.Builder unknownFields = com.google.protobuf.UnknownFieldSet.newBuilder(this.getUnknownFields());
- while (true) {
- int tag = input.readTag();
- switch (tag) {
- case 0:
- this.setUnknownFields(unknownFields.build());
- return this;
- default: {
- if (!parseUnknownField(input, unknownFields, extensionRegistry, tag)) {
- this.setUnknownFields(unknownFields.build());
- return this;
- }
- break;
- }
- case 8: {
- setIGNORE(input.readBool());
- break;
- }
- case 16: {
- setCOUNT(input.readUInt64());
- break;
- }
- case 26: {
- addUID(input.readString());
- break;
- }
- }
- }
- }
-
- // required bool IGNORE = 1;
- public boolean hasIGNORE() {
- return result.hasIGNORE();
- }
-
- public boolean getIGNORE() {
- return result.getIGNORE();
- }
-
- public Builder setIGNORE(boolean value) {
- result.hasIGNORE = true;
- result.iGNORE_ = value;
- return this;
- }
-
- public Builder clearIGNORE() {
- result.hasIGNORE = false;
- result.iGNORE_ = false;
- return this;
- }
-
- // required uint64 COUNT = 2;
- public boolean hasCOUNT() {
- return result.hasCOUNT();
- }
-
- public long getCOUNT() {
- return result.getCOUNT();
- }
-
- public Builder setCOUNT(long value) {
- result.hasCOUNT = true;
- result.cOUNT_ = value;
- return this;
- }
-
- public Builder clearCOUNT() {
- result.hasCOUNT = false;
- result.cOUNT_ = 0L;
- return this;
- }
-
- // repeated string UID = 3;
- public java.util.List<java.lang.String> getUIDList() {
- return java.util.Collections.unmodifiableList(result.uID_);
- }
-
- public int getUIDCount() {
- return result.getUIDCount();
- }
-
- public java.lang.String getUID(int index) {
- return result.getUID(index);
- }
-
- public Builder setUID(int index, java.lang.String value) {
- if (value == null) {
- throw new NullPointerException();
- }
- result.uID_.set(index, value);
- return this;
- }
-
- public Builder addUID(java.lang.String value) {
- if (value == null) {
- throw new NullPointerException();
- }
- if (result.uID_.isEmpty()) {
- result.uID_ = new java.util.ArrayList<java.lang.String>();
- }
- result.uID_.add(value);
- return this;
- }
-
- public Builder addAllUID(java.lang.Iterable<? extends java.lang.String> values) {
- if (result.uID_.isEmpty()) {
- result.uID_ = new java.util.ArrayList<java.lang.String>();
- }
- super.addAll(values, result.uID_);
- return this;
- }
-
- public Builder clearUID() {
- result.uID_ = java.util.Collections.emptyList();
- return this;
- }
-
- // @@protoc_insertion_point(builder_scope:protobuf.List)
- }
-
- static {
- defaultInstance = new List(true);
- org.apache.accumulo.examples.wikisearch.protobuf.Uid.internalForceInit();
- defaultInstance.initFields();
- }
-
- // @@protoc_insertion_point(class_scope:protobuf.List)
- }
-
- private static com.google.protobuf.Descriptors.Descriptor internal_static_protobuf_List_descriptor;
- private static com.google.protobuf.GeneratedMessage.FieldAccessorTable internal_static_protobuf_List_fieldAccessorTable;
-
- public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
- return descriptor;
- }
-
- private static com.google.protobuf.Descriptors.FileDescriptor descriptor;
- static {
- java.lang.String[] descriptorData = {"\n\tUid.proto\022\010protobuf\"2\n\004List\022\016\n\006IGNORE\030"
- + "\001 \002(\010\022\r\n\005COUNT\030\002 \002(\004\022\013\n\003UID\030\003 \003(\tB\014\n\010pro" + "tobufH\001"};
- com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
- public com.google.protobuf.ExtensionRegistry assignDescriptors(com.google.protobuf.Descriptors.FileDescriptor root) {
- descriptor = root;
- internal_static_protobuf_List_descriptor = getDescriptor().getMessageTypes().get(0);
- internal_static_protobuf_List_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable(
- internal_static_protobuf_List_descriptor, new java.lang.String[] {"IGNORE", "COUNT", "UID",}, org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.class,
- org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder.class);
- return null;
- }
- };
- com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[] {},
- assigner);
- }
-
- public static void internalForceInit() {}
-
- // @@protoc_insertion_point(outer_class_scope)
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java
deleted file mode 100644
index 09755c0..0000000
--- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.reader;
-
-
-import java.io.IOException;
-
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
-import org.apache.accumulo.examples.wikisearch.util.TextUtil;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-
-/**
- * This class aggregates Text values based on a start and end filter. An example use case for this would be XML data. This will not work with data that has
- * nested start and stop tokens.
- *
- */
-public class AggregatingRecordReader extends LongLineRecordReader {
-
- public static final String START_TOKEN = "aggregating.token.start";
- public static final String END_TOKEN = "aggregating.token.end";
- public static final String RETURN_PARTIAL_MATCHES = "aggregating.allow.partial";
-
- private LongWritable key = new LongWritable();
- private String startToken = null;
- private String endToken = null;
- private long counter = 0;
- private Text aggValue = new Text();
- private boolean startFound = false;
- private StringBuilder remainder = new StringBuilder(0);
- private boolean returnPartialMatches = false;
-
- @Override
- public LongWritable getCurrentKey() {
- key.set(counter);
- return key;
- }
-
- @Override
- public Text getCurrentValue() {
- return aggValue;
- }
-
- @Override
- public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
- super.initialize(((WikipediaInputSplit)genericSplit).getFileSplit(), context);
- this.startToken = WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class);
- this.endToken = WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class);
- this.returnPartialMatches = context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false);
-
- /*
- * Text-appending works almost exactly like the + operator on Strings- it creates a byte array exactly the size of [prefix + suffix] and dumps the bytes
- * into the new array. This module works by doing lots of little additions, one line at a time. With most XML, the documents are partitioned on line
- * boundaries, so we will generally have lots of additions. Setting a large default byte array for a text object can avoid this and give us
- * StringBuilder-like functionality for Text objects.
- */
- byte[] txtBuffer = new byte[2048];
- aggValue.set(txtBuffer);
- }
-
- @Override
- public boolean nextKeyValue() throws IOException {
- aggValue.clear();
- boolean hasNext = false;
- boolean finished = false;
- // Find the start token
- while (!finished && (((hasNext = super.nextKeyValue()) == true) || remainder.length() > 0)) {
- if (hasNext)
- finished = process(super.getCurrentValue());
- else
- finished = process(null);
- if (finished) {
- startFound = false;
- counter++;
- return true;
- }
- }
- // If we have anything loaded in the agg value (and we found a start)
- // then we ran out of data before finding the end. Just return the
- // data we have and if it's not valid, downstream parsing of the data
- // will fail.
- if (returnPartialMatches && startFound && aggValue.getLength() > 0) {
- startFound = false;
- counter++;
- return true;
- }
- return false;
- }
-
- /**
- * Populates aggValue with the contents of the Text object.
- *
- * @param t
- * @return true if aggValue is complete, else false and needs more data.
- */
- private boolean process(Text t) {
-
- if (null != t)
- remainder.append(t.toString());
- while (remainder.length() > 0) {
- if (!startFound) {
- // If found, then begin aggregating at the start offset
- int start = remainder.indexOf(startToken);
- if (-1 != start) {
- // Append the start token to the aggregate value
- TextUtil.textAppendNoNull(aggValue, remainder.substring(start, start + startToken.length()), false);
- // Remove to the end of the start token from the remainder
- remainder.delete(0, start + startToken.length());
- startFound = true;
- } else {
- // If we are looking for the start and have not found it, then remove
- // the bytes
- remainder.delete(0, remainder.length());
- }
- } else {
- // Try to find the end
- int end = remainder.indexOf(endToken);
- // Also try to find the start
- int start = remainder.indexOf(startToken);
- if (-1 == end) {
- if (returnPartialMatches && start >= 0) {
- // End token not found, but another start token was found...
- // The amount to copy is up to the beginning of the next start token
- TextUtil.textAppendNoNull(aggValue, remainder.substring(0, start), false);
- remainder.delete(0, start);
- return true;
- } else {
- // Not found, aggregate the entire remainder
- TextUtil.textAppendNoNull(aggValue, remainder.toString(), false);
- // Delete all chars from remainder
- remainder.delete(0, remainder.length());
- }
- } else {
- if (returnPartialMatches && start >= 0 && start < end) {
- // We found the end token, but found another start token first, so
- // deal with that.
- TextUtil.textAppendNoNull(aggValue, remainder.substring(0, start), false);
- remainder.delete(0, start);
- return true;
- } else {
- // END_TOKEN was found. Extract to the end of END_TOKEN
- TextUtil.textAppendNoNull(aggValue, remainder.substring(0, end + endToken.length()), false);
- // Remove from remainder up to the end of END_TOKEN
- remainder.delete(0, end + endToken.length());
- return true;
- }
- }
- }
- }
- return false;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java
deleted file mode 100644
index a4da0ad..0000000
--- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.reader;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-
-/**
- * A class that provides a line reader from an input stream.
- */
-public class LfLineReader {
- private static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
- private int bufferSize = DEFAULT_BUFFER_SIZE;
- private InputStream in;
- private byte[] buffer;
- // the number of bytes of real data in the buffer
- private int bufferLength = 0;
- // the current position in the buffer
- private int bufferPosn = 0;
-
- private static final byte LF = '\n';
-
- /**
- * Create a line reader that reads from the given stream using the default buffer-size (64k).
- *
- * @param in
- * The input stream
- * @throws IOException
- */
- public LfLineReader(InputStream in) {
- this(in, DEFAULT_BUFFER_SIZE);
- }
-
- /**
- * Create a line reader that reads from the given stream using the given buffer-size.
- *
- * @param in
- * The input stream
- * @param bufferSize
- * Size of the read buffer
- * @throws IOException
- */
- public LfLineReader(InputStream in, int bufferSize) {
- this.in = in;
- this.bufferSize = bufferSize;
- this.buffer = new byte[this.bufferSize];
- }
-
- /**
- * Create a line reader that reads from the given stream using the <code>io.file.buffer.size</code> specified in the given <code>Configuration</code>.
- *
- * @param in
- * input stream
- * @param conf
- * configuration
- * @throws IOException
- */
- public LfLineReader(InputStream in, Configuration conf) throws IOException {
- this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE));
- }
-
- /**
- * Close the underlying stream.
- *
- * @throws IOException
- */
- public void close() throws IOException {
- in.close();
- }
-
- /**
- * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF). EOF also terminates an otherwise unterminated line.
- *
- * @param str
- * the object to store the given line (without newline)
- * @param maxLineLength
- * the maximum number of bytes to store into str; the rest of the line is silently discarded.
- * @param maxBytesToConsume
- * the maximum number of bytes to consume in this call. This is only a hint, because if the line cross this threshold, we allow it to happen. It can
- * overshoot potentially by as much as one buffer length.
- *
- * @return the number of bytes read including the (longest) newline found.
- *
- * @throws IOException
- * if the underlying stream throws
- */
- public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
- /*
- * We're reading data from in, but the head of the stream may be already buffered in buffer, so we have several cases: 1. No newline characters are in the
- * buffer, so we need to copy everything and read another buffer from the stream. 2. An unambiguously terminated line is in buffer, so we just copy to str.
- */
- str.clear();
- int txtLength = 0; // tracks str.getLength(), as an optimization
- int newlineLength = 0; // length of terminating newline
- long bytesConsumed = 0;
- do {
- int startPosn = bufferPosn; // starting from where we left off the last time
- if (bufferPosn >= bufferLength) {
- startPosn = bufferPosn = 0;
- bufferLength = in.read(buffer);
- if (bufferLength <= 0)
- break; // EOF
- }
- for (; bufferPosn < bufferLength; ++bufferPosn) { // search for newline
- if (buffer[bufferPosn] == LF) {
- newlineLength = 1;
- ++bufferPosn; // at next invocation proceed from following byte
- break;
- }
- }
- int readLength = bufferPosn - startPosn;
- bytesConsumed += readLength;
- int appendLength = readLength - newlineLength;
- if (appendLength > maxLineLength - txtLength) {
- appendLength = maxLineLength - txtLength;
- }
- if (appendLength > 0) {
- str.append(buffer, startPosn, appendLength);
- txtLength += appendLength;
- }
- } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);
-
- if (bytesConsumed > Integer.MAX_VALUE)
- throw new IOException("Too many bytes before newline: " + bytesConsumed);
- return (int) bytesConsumed;
- }
-
- /**
- * Read from the InputStream into the given Text.
- *
- * @param str
- * the object to store the given line
- * @param maxLineLength
- * the maximum number of bytes to store into str.
- * @return the number of bytes read including the newline
- * @throws IOException
- * if the underlying stream throws
- */
- public int readLine(Text str, int maxLineLength) throws IOException {
- return readLine(str, maxLineLength, Integer.MAX_VALUE);
- }
-
- /**
- * Read from the InputStream into the given Text.
- *
- * @param str
- * the object to store the given line
- * @return the number of bytes read including the newline
- * @throws IOException
- * if the underlying stream throws
- */
- public int readLine(Text str) throws IOException {
- return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java
deleted file mode 100644
index f36c373..0000000
--- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.reader;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.CompressionCodecFactory;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
-import org.apache.hadoop.util.LineReader;
-
-/**
- * A copy of {@link LineRecordReader} which does not discard lines longer than "mapred.linerecordreader.maxlength". Instead, it returns them, leaving it to the
- * mapper to decide what to do with it. It also does not treat '\r' (CR) characters as new lines -- it uses {@link LfLineReader} instead of {@link LineReader}
- * to read lines.
- */
-public class LongLineRecordReader extends RecordReader<LongWritable,Text> {
- private CompressionCodecFactory compressionCodecs = null;
- private long start;
- private long pos;
- private long end;
- private LfLineReader in;
- private int maxLineLength;
- private LongWritable key = null;
- private Text value = null;
-
- @Override
- public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
- FileSplit split = (FileSplit) genericSplit;
- Configuration job = context.getConfiguration();
- this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
- start = split.getStart();
- end = start + split.getLength();
- final Path file = split.getPath();
- compressionCodecs = new CompressionCodecFactory(job);
- final CompressionCodec codec = compressionCodecs.getCodec(file);
-
- // open the file and seek to the start of the split
- FileSystem fs = file.getFileSystem(job);
- FSDataInputStream fileIn = fs.open(split.getPath());
- boolean skipFirstLine = false;
- if (codec != null) {
- in = new LfLineReader(codec.createInputStream(fileIn), job);
- end = Long.MAX_VALUE;
- } else {
- if (start != 0) {
- skipFirstLine = true;
- --start;
- fileIn.seek(start);
- }
- in = new LfLineReader(fileIn, job);
- }
- if (skipFirstLine) { // skip first line and re-establish "start".
- start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
- }
- this.pos = start;
- }
-
- @Override
- public boolean nextKeyValue() throws IOException {
- if (key == null) {
- key = new LongWritable();
- }
- key.set(pos);
- if (value == null) {
- value = new Text();
- }
- int newSize = 0;
- if (pos < end) {
- newSize = in.readLine(value, maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
- if (newSize != 0) {
- pos += newSize;
- }
- }
- if (newSize == 0) {
- key = null;
- value = null;
- return false;
- } else {
- return true;
- }
- }
-
- @Override
- public LongWritable getCurrentKey() {
- return key;
- }
-
- @Override
- public Text getCurrentValue() {
- return value;
- }
-
- /**
- * Get the progress within the split
- */
- @Override
- public float getProgress() {
- if (start == end) {
- return 0.0f;
- } else {
- return Math.min(1.0f, (pos - start) / (float) (end - start));
- }
- }
-
- @Override
- public synchronized void close() throws IOException {
- if (in != null) {
- in.close();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
deleted file mode 100644
index 1623d55..0000000
--- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.util;
-
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-
-import org.apache.accumulo.core.iterators.user.SummingCombiner;
-import org.apache.hadoop.io.Text;
-
-public class TextUtil {
-
- /**
- * Appends a null byte followed by the UTF-8 bytes of the given string to the given {@link Text}
- *
- * @param text
- * the Text to which to append
- * @param string
- * the String to append
- */
- public static void textAppend(Text text, String string) {
- appendNullByte(text);
- textAppendNoNull(text, string);
- }
-
- public static void textAppend(Text text, String string, boolean replaceBadChar) {
- appendNullByte(text);
- textAppendNoNull(text, string, replaceBadChar);
- }
-
- public static void textAppend(Text t, long s) {
- t.append(nullByte, 0, 1);
- t.append(SummingCombiner.FIXED_LEN_ENCODER.encode(s), 0, 8);
- }
-
- private static final byte[] nullByte = {0};
-
- /**
- * Appends a null byte to the given text
- *
- * @param text
- * the text to which to append the null byte
- */
- public static void appendNullByte(Text text) {
- text.append(nullByte, 0, nullByte.length);
- }
-
- /**
- * Appends the UTF-8 bytes of the given string to the given {@link Text}
- *
- * @param t
- * the Text to which to append
- * @param s
- * the String to append
- */
- public static void textAppendNoNull(Text t, String s) {
- textAppendNoNull(t, s, false);
- }
-
- /**
- * Appends the UTF-8 bytes of the given string to the given {@link Text}
- *
- * @param t
- * @param s
- * @param replaceBadChar
- */
- public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) {
- try {
- ByteBuffer buffer = Text.encode(s, replaceBadChar);
- t.append(buffer.array(), 0, buffer.limit());
- } catch (CharacterCodingException cce) {
- throw new IllegalArgumentException(cce);
- }
- }
-
- /**
- * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to UTF-8 and is much faster than calling
- * {@link String#getBytes(String)}.
- *
- * @param string
- * the string to convert
- * @return the UTF-8 representation of the string
- */
- public static byte[] toUtf8(String string) {
- ByteBuffer buffer;
- try {
- buffer = Text.encode(string, false);
- } catch (CharacterCodingException cce) {
- throw new IllegalArgumentException(cce);
- }
- byte[] bytes = new byte[buffer.limit()];
- System.arraycopy(buffer.array(), 0, bytes, 0, bytes.length);
- return bytes;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto b/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto
deleted file mode 100644
index 41ae188..0000000
--- a/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one or more
-// contributor license agreements. See the NOTICE file distributed with
-// this work for additional information regarding copyright ownership.
-// The ASF licenses this file to You under the Apache License, Version 2.0
-// (the "License"); you may not use this file except in compliance with
-// the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// compile with protoc --java_out ../java
-// compile extra builder util with java accumulo.data.protobuf.builder.ProtoBufBuilder -d ../java accumulo.data.protobuf.UidList
-// classpath for compile command should include ../../../target/classes and protobuf-java-2.2.0.jar
-
-package protobuf;
-
-option java_package = "protobuf";
-option optimize_for = SPEED;
-
-message Info {
- required float normalizedTermFrequency = 1;
- repeated uint32 wordOffset = 2;
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto b/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto
deleted file mode 100644
index 30aa446..0000000
--- a/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto
+++ /dev/null
@@ -1,29 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one or more
-// contributor license agreements. See the NOTICE file distributed with
-// this work for additional information regarding copyright ownership.
-// The ASF licenses this file to You under the Apache License, Version 2.0
-// (the "License"); you may not use this file except in compliance with
-// the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// compile with protoc --java_out ../java
-// compile extra builder util with java accumulo.data.protobuf.builder.ProtoBufBuilder -d ../java accumulo.data.protobuf.UidList
-// classpath for compile command should include ../../../target/classes and protobuf-java-2.2.0.jar
-
-package protobuf;
-
-option java_package = "protobuf";
-option optimize_for = SPEED;
-
-message List {
- required bool IGNORE = 1;
- required uint64 COUNT = 2;
- repeated string UID = 3;
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh b/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh
deleted file mode 100755
index 6702998..0000000
--- a/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-for PROTO in `ls -1 *proto`; do protoc --java_out ../java $PROTO; done
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java
deleted file mode 100644
index 6af1e9b..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.ingest;
-
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Counters;
-import org.apache.hadoop.mapreduce.StatusReporter;
-
-public class StandaloneStatusReporter extends StatusReporter {
-
- private Counters c = new Counters();
-
- private long filesProcessed = 0;
- private long recordsProcessed = 0;
-
- public Counters getCounters() {
- return c;
- }
-
- @Override
- public Counter getCounter(Enum<?> name) {
- return c.findCounter(name);
- }
-
- @Override
- public Counter getCounter(String group, String name) {
- return c.findCounter(group, name);
- }
-
- @Override
- public void progress() {
- // do nothing
- }
-
- @Override
- public void setStatus(String status) {
- // do nothing
- }
-
- public long getFilesProcessed() {
- return filesProcessed;
- }
-
- public long getRecordsProcessed() {
- return recordsProcessed;
- }
-
- public void incrementFilesProcessed() {
- filesProcessed++;
- recordsProcessed = 0;
- }
-
- public void incrementRecordsProcessed() {
- recordsProcessed++;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
deleted file mode 100644
index f6b2791..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.ingest;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInput;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-
-import junit.framework.Assert;
-
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.junit.Test;
-
-public class WikipediaInputSplitTest {
- @Test
- public void testSerialization() throws IOException {
- Path testPath = new Path("/foo/bar");
- String[] hosts = new String[2];
- hosts[0] = "abcd";
- hosts[1] = "efgh";
- FileSplit fSplit = new FileSplit(testPath, 1, 2, hosts);
- WikipediaInputSplit split = new WikipediaInputSplit(fSplit, 7);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- ObjectOutputStream out = new ObjectOutputStream(baos);
- split.write(out);
- out.close();
- baos.close();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
- DataInput in = new ObjectInputStream(bais);
-
- WikipediaInputSplit split2 = new WikipediaInputSplit();
- split2.readFields(in);
- Assert.assertTrue(bais.available() == 0);
- bais.close();
-
- Assert.assertTrue(split.getPartition() == split2.getPartition());
-
- FileSplit fSplit2 = split2.getFileSplit();
- Assert.assertTrue(fSplit.getPath().equals(fSplit2.getPath()));
- Assert.assertTrue(fSplit.getStart() == fSplit2.getStart());
- Assert.assertTrue(fSplit.getLength() == fSplit2.getLength());
-
- String[] hosts2 = fSplit2.getLocations();
- Assert.assertEquals(hosts.length, hosts2.length);
- for (int i = 0; i < hosts.length; i++) {
- Assert.assertEquals(hosts[i], hosts2[i]);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
deleted file mode 100644
index c659ec4..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.ingest;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.util.HashMap;
-import java.util.Map.Entry;
-
-import junit.framework.Assert;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.MutationsRejectedException;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.ContextFactory;
-import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RawLocalFileSystem;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
-import org.junit.Before;
-
-/**
- * Load some data into mock accumulo
- */
-public class WikipediaMapperTest {
-
- private static final String METADATA_TABLE_NAME = "wikiMetadata";
-
- private static final String TABLE_NAME = "wiki";
-
- private static final String INDEX_TABLE_NAME = "wikiIndex";
-
- private static final String RINDEX_TABLE_NAME = "wikiReverseIndex";
-
- private class MockAccumuloRecordWriter extends RecordWriter<Text,Mutation> {
- @Override
- public void write(Text key, Mutation value) throws IOException, InterruptedException {
- try {
- writerMap.get(key).addMutation(value);
- } catch (MutationsRejectedException e) {
- throw new IOException("Error adding mutation", e);
- }
- }
-
- @Override
- public void close(TaskAttemptContext context) throws IOException, InterruptedException {
- try {
- for (BatchWriter w : writerMap.values()) {
- w.flush();
- w.close();
- }
- } catch (MutationsRejectedException e) {
- throw new IOException("Error closing Batch Writer", e);
- }
- }
-
- }
-
- private Connector c = null;
- private Configuration conf = new Configuration();
- private HashMap<Text,BatchWriter> writerMap = new HashMap<Text,BatchWriter>();
-
- @Before
- public void setup() throws Exception {
-
- conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
- conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
- conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME);
- conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1");
- conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
-
- MockInstance i = new MockInstance();
- c = i.getConnector("root", "pass");
- c.tableOperations().delete(METADATA_TABLE_NAME);
- c.tableOperations().delete(TABLE_NAME);
- c.tableOperations().delete(INDEX_TABLE_NAME);
- c.tableOperations().delete(RINDEX_TABLE_NAME);
- c.tableOperations().create(METADATA_TABLE_NAME);
- c.tableOperations().create(TABLE_NAME);
- c.tableOperations().create(INDEX_TABLE_NAME);
- c.tableOperations().create(RINDEX_TABLE_NAME);
-
- writerMap.put(new Text(METADATA_TABLE_NAME), c.createBatchWriter(METADATA_TABLE_NAME, 1000L, 1000L, 1));
- writerMap.put(new Text(TABLE_NAME), c.createBatchWriter(TABLE_NAME, 1000L, 1000L, 1));
- writerMap.put(new Text(INDEX_TABLE_NAME), c.createBatchWriter(INDEX_TABLE_NAME, 1000L, 1000L, 1));
- writerMap.put(new Text(RINDEX_TABLE_NAME), c.createBatchWriter(RINDEX_TABLE_NAME, 1000L, 1000L, 1));
-
- TaskAttemptContext context = ContextFactory.createTaskAttemptContext(conf);
-
- RawLocalFileSystem fs = new RawLocalFileSystem();
- fs.setConf(conf);
-
- URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml");
- Assert.assertNotNull(url);
- File data = new File(url.toURI());
- Path tmpFile = new Path(data.getAbsolutePath());
-
- // Setup the Mapper
- InputSplit split = new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null);
- AggregatingRecordReader rr = new AggregatingRecordReader();
- Path ocPath = new Path(tmpFile, "oc");
- OutputCommitter oc = new FileOutputCommitter(ocPath, context);
- fs.deleteOnExit(ocPath);
- StandaloneStatusReporter sr = new StandaloneStatusReporter();
- rr.initialize(split, context);
- MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter();
- WikipediaMapper mapper = new WikipediaMapper();
-
- // Load data into Mock Accumulo
- Mapper<LongWritable,Text,Text,Mutation>.Context con = ContextFactory.createMapContext(mapper, context, rr, rw, oc, sr, split);
- mapper.run(con);
-
- // Flush and close record writers.
- rw.close(context);
-
- }
-
- private void debugQuery(String tableName) throws Exception {
- Scanner s = c.createScanner(tableName, new Authorizations("all"));
- Range r = new Range();
- s.setRange(r);
- for (Entry<Key,Value> entry : s)
- System.out.println(entry.getKey().toString() + " " + entry.getValue().toString());
- }
-
- public void testViewAllData() throws Exception {
- debugQuery(METADATA_TABLE_NAME);
- debugQuery(TABLE_NAME);
- debugQuery(INDEX_TABLE_NAME);
- debugQuery(RINDEX_TABLE_NAME);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
deleted file mode 100644
index 6619ede..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.iterator;
-
-import static org.junit.Assert.assertTrue;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.UUID;
-
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.Combiner;
-import org.apache.accumulo.examples.wikisearch.protobuf.Uid;
-import org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.junit.Before;
-import org.junit.Test;
-
-public class GlobalIndexUidTest {
- private GlobalIndexUidCombiner combiner;
- private List<Value> values;
-
- @Before
- public void setup() throws Exception {
- combiner = new GlobalIndexUidCombiner();
- combiner.init(null, Collections.singletonMap("all", "true"), null);
- values = new ArrayList<Value>();
- }
-
- private Uid.List.Builder createNewUidList() {
- return Uid.List.newBuilder();
- }
-
- @Test
- public void testSingleUid() {
- Builder b = createNewUidList();
- b.setCOUNT(1);
- b.setIGNORE(false);
- b.addUID(UUID.randomUUID().toString());
- Uid.List uidList = b.build();
- Value val = new Value(uidList.toByteArray());
- values.add(val);
- Value result = combiner.reduce(new Key(), values.iterator());
- assertTrue(val.compareTo(result.get()) == 0);
- }
-
- @Test
- public void testLessThanMax() throws Exception {
- List<String> savedUUIDs = new ArrayList<String>();
- for (int i = 0; i < GlobalIndexUidCombiner.MAX - 1; i++) {
- Builder b = createNewUidList();
- b.setIGNORE(false);
- String uuid = UUID.randomUUID().toString();
- savedUUIDs.add(uuid);
- b.setCOUNT(i);
- b.addUID(uuid);
- Uid.List uidList = b.build();
- Value val = new Value(uidList.toByteArray());
- values.add(val);
- }
- Value result = combiner.reduce(new Key(), values.iterator());
- Uid.List resultList = Uid.List.parseFrom(result.get());
- assertTrue(resultList.getIGNORE() == false);
- assertTrue(resultList.getUIDCount() == (GlobalIndexUidCombiner.MAX - 1));
- List<String> resultListUUIDs = resultList.getUIDList();
- for (String s : savedUUIDs)
- assertTrue(resultListUUIDs.contains(s));
- }
-
- @Test
- public void testEqualsMax() throws Exception {
- List<String> savedUUIDs = new ArrayList<String>();
- for (int i = 0; i < GlobalIndexUidCombiner.MAX; i++) {
- Builder b = createNewUidList();
- b.setIGNORE(false);
- String uuid = UUID.randomUUID().toString();
- savedUUIDs.add(uuid);
- b.setCOUNT(i);
- b.addUID(uuid);
- Uid.List uidList = b.build();
- Value val = new Value(uidList.toByteArray());
- values.add(val);
- }
- Value result = combiner.reduce(new Key(), values.iterator());
- Uid.List resultList = Uid.List.parseFrom(result.get());
- assertTrue(resultList.getIGNORE() == false);
- assertTrue(resultList.getUIDCount() == (GlobalIndexUidCombiner.MAX));
- List<String> resultListUUIDs = resultList.getUIDList();
- for (String s : savedUUIDs)
- assertTrue(resultListUUIDs.contains(s));
- }
-
- @Test
- public void testMoreThanMax() throws Exception {
- List<String> savedUUIDs = new ArrayList<String>();
- for (int i = 0; i < GlobalIndexUidCombiner.MAX + 10; i++) {
- Builder b = createNewUidList();
- b.setIGNORE(false);
- String uuid = UUID.randomUUID().toString();
- savedUUIDs.add(uuid);
- b.setCOUNT(1);
- b.addUID(uuid);
- Uid.List uidList = b.build();
- Value val = new Value(uidList.toByteArray());
- values.add(val);
- }
- Value result = combiner.reduce(new Key(), values.iterator());
- Uid.List resultList = Uid.List.parseFrom(result.get());
- assertTrue(resultList.getIGNORE() == true);
- assertTrue(resultList.getUIDCount() == 0);
- assertTrue(resultList.getCOUNT() == (GlobalIndexUidCombiner.MAX + 10));
- }
-
- @Test
- public void testSeenIgnore() throws Exception {
- Builder b = createNewUidList();
- b.setIGNORE(true);
- b.setCOUNT(0);
- Uid.List uidList = b.build();
- Value val = new Value(uidList.toByteArray());
- values.add(val);
- b = createNewUidList();
- b.setIGNORE(false);
- b.setCOUNT(1);
- b.addUID(UUID.randomUUID().toString());
- uidList = b.build();
- val = new Value(uidList.toByteArray());
- values.add(val);
- Value result = combiner.reduce(new Key(), values.iterator());
- Uid.List resultList = Uid.List.parseFrom(result.get());
- assertTrue(resultList.getIGNORE() == true);
- assertTrue(resultList.getUIDCount() == 0);
- assertTrue(resultList.getCOUNT() == 1);
- }
-
- @Test
- public void testInvalidValueType() throws Exception {
- Combiner comb = new GlobalIndexUidCombiner();
- IteratorSetting setting = new IteratorSetting(1, GlobalIndexUidCombiner.class);
- GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
- GlobalIndexUidCombiner.setLossyness(setting, true);
- comb.init(null, setting.getOptions(), null);
- Logger.getLogger(GlobalIndexUidCombiner.class).setLevel(Level.OFF);
- Value val = new Value(UUID.randomUUID().toString().getBytes());
- values.add(val);
- Value result = comb.reduce(new Key(), values.iterator());
- Uid.List resultList = Uid.List.parseFrom(result.get());
- assertTrue(resultList.getIGNORE() == false);
- assertTrue(resultList.getUIDCount() == 0);
- assertTrue(resultList.getCOUNT() == 0);
- }
-
- @Test
- public void testCount() throws Exception {
- UUID uuid = UUID.randomUUID();
- // Collect the same UUID five times.
- for (int i = 0; i < 5; i++) {
- Builder b = createNewUidList();
- b.setCOUNT(1);
- b.setIGNORE(false);
- b.addUID(uuid.toString());
- Uid.List uidList = b.build();
- Value val = new Value(uidList.toByteArray());
- values.add(val);
- }
- Value result = combiner.reduce(new Key(), values.iterator());
- Uid.List resultList = Uid.List.parseFrom(result.get());
- assertTrue(resultList.getIGNORE() == false);
- assertTrue(resultList.getUIDCount() == 1);
- assertTrue(resultList.getCOUNT() == 5);
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
deleted file mode 100644
index 7297b5a..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.iterator;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-import junit.framework.Assert;
-
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight;
-import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight.Info.Builder;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.google.protobuf.InvalidProtocolBufferException;
-
-public class TextIndexTest {
- private TextIndexCombiner combiner;
- private List<Value> values;
-
- @Before
- public void setup() throws Exception {
- combiner = new TextIndexCombiner();
- combiner.init(null, Collections.singletonMap("all", "true"), null);
- values = new ArrayList<Value>();
- }
-
- @After
- public void cleanup() {
-
- }
-
- private TermWeight.Info.Builder createBuilder() {
- return TermWeight.Info.newBuilder();
- }
-
- @Test
- public void testSingleValue() throws InvalidProtocolBufferException {
- Builder builder = createBuilder();
- builder.addWordOffset(1);
- builder.addWordOffset(5);
- builder.setNormalizedTermFrequency(0.1f);
-
- values.add(new Value(builder.build().toByteArray()));
-
- Value result = combiner.reduce(new Key(), values.iterator());
-
- TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-
- Assert.assertTrue(info.getNormalizedTermFrequency() == 0.1f);
-
- List<Integer> offsets = info.getWordOffsetList();
- Assert.assertTrue(offsets.size() == 2);
- Assert.assertTrue(offsets.get(0) == 1);
- Assert.assertTrue(offsets.get(1) == 5);
- }
-
- @Test
- public void testAggregateTwoValues() throws InvalidProtocolBufferException {
- Builder builder = createBuilder();
- builder.addWordOffset(1);
- builder.addWordOffset(5);
- builder.setNormalizedTermFrequency(0.1f);
-
- values.add(new Value(builder.build().toByteArray()));
-
- builder = createBuilder();
- builder.addWordOffset(3);
- builder.setNormalizedTermFrequency(0.05f);
-
- values.add(new Value(builder.build().toByteArray()));
-
- Value result = combiner.reduce(new Key(), values.iterator());
-
- TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-
- Assert.assertTrue(info.getNormalizedTermFrequency() == 0.15f);
-
- List<Integer> offsets = info.getWordOffsetList();
- Assert.assertTrue(offsets.size() == 3);
- Assert.assertTrue(offsets.get(0) == 1);
- Assert.assertTrue(offsets.get(1) == 3);
- Assert.assertTrue(offsets.get(2) == 5);
- }
-
- @Test
- public void testAggregateManyValues() throws InvalidProtocolBufferException {
- Builder builder = createBuilder();
- builder.addWordOffset(13);
- builder.addWordOffset(15);
- builder.addWordOffset(19);
- builder.setNormalizedTermFrequency(0.12f);
-
- values.add(new Value(builder.build().toByteArray()));
-
- builder = createBuilder();
- builder.addWordOffset(1);
- builder.addWordOffset(5);
- builder.setNormalizedTermFrequency(0.1f);
-
- values.add(new Value(builder.build().toByteArray()));
-
- builder = createBuilder();
- builder.addWordOffset(3);
- builder.setNormalizedTermFrequency(0.05f);
-
- values.add(new Value(builder.build().toByteArray()));
-
- Value result = combiner.reduce(new Key(), values.iterator());
-
- TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-
- Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f);
-
- List<Integer> offsets = info.getWordOffsetList();
- Assert.assertTrue(offsets.size() == 6);
- Assert.assertTrue(offsets.get(0) == 1);
- Assert.assertTrue(offsets.get(1) == 3);
- Assert.assertTrue(offsets.get(2) == 5);
- Assert.assertTrue(offsets.get(3) == 13);
- Assert.assertTrue(offsets.get(4) == 15);
- Assert.assertTrue(offsets.get(5) == 19);
- }
-
- @Test
- public void testEmptyValue() throws InvalidProtocolBufferException {
- Builder builder = createBuilder();
- builder.addWordOffset(13);
- builder.addWordOffset(15);
- builder.addWordOffset(19);
- builder.setNormalizedTermFrequency(0.12f);
-
- values.add(new Value("".getBytes()));
- values.add(new Value(builder.build().toByteArray()));
- values.add(new Value("".getBytes()));
-
- builder = createBuilder();
- builder.addWordOffset(1);
- builder.addWordOffset(5);
- builder.setNormalizedTermFrequency(0.1f);
-
- values.add(new Value(builder.build().toByteArray()));
- values.add(new Value("".getBytes()));
-
- builder = createBuilder();
- builder.addWordOffset(3);
- builder.setNormalizedTermFrequency(0.05f);
-
- values.add(new Value(builder.build().toByteArray()));
- values.add(new Value("".getBytes()));
-
- Value result = combiner.reduce(new Key(), values.iterator());
-
- TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-
- Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f);
-
- List<Integer> offsets = info.getWordOffsetList();
- Assert.assertTrue(offsets.size() == 6);
- Assert.assertTrue(offsets.get(0) == 1);
- Assert.assertTrue(offsets.get(1) == 3);
- Assert.assertTrue(offsets.get(2) == 5);
- Assert.assertTrue(offsets.get(3) == 13);
- Assert.assertTrue(offsets.get(4) == 15);
- Assert.assertTrue(offsets.get(5) == 19);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java
deleted file mode 100644
index 470633c..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.normalizer;
-
-import static org.junit.Assert.assertTrue;
-
-import org.apache.accumulo.examples.wikisearch.normalizer.NumberNormalizer;
-import org.junit.Test;
-
-public class testNumberNormalizer {
-
- @Test
- public void test1() throws Exception {
- NumberNormalizer nn = new NumberNormalizer();
-
- String n1 = nn.normalizeFieldValue(null, "1");
- String n2 = nn.normalizeFieldValue(null, "1.00000000");
-
- assertTrue(n1.compareTo(n2) < 0);
-
- }
-
- @Test
- public void test2() {
- NumberNormalizer nn = new NumberNormalizer();
-
- String n1 = nn.normalizeFieldValue(null, "-1.0");
- String n2 = nn.normalizeFieldValue(null, "1.0");
-
- assertTrue(n1.compareTo(n2) < 0);
-
- }
-
- @Test
- public void test3() {
- NumberNormalizer nn = new NumberNormalizer();
- String n1 = nn.normalizeFieldValue(null, "-0.0001");
- String n2 = nn.normalizeFieldValue(null, "0");
- String n3 = nn.normalizeFieldValue(null, "0.00001");
-
- assertTrue((n1.compareTo(n2) < 0) && (n2.compareTo(n3) < 0));
- }
-
- @Test
- public void test4() {
- NumberNormalizer nn = new NumberNormalizer();
- String nn1 = nn.normalizeFieldValue(null, Integer.toString(Integer.MAX_VALUE));
- String nn2 = nn.normalizeFieldValue(null, Integer.toString(Integer.MAX_VALUE - 1));
-
- assertTrue((nn2.compareTo(nn1) < 0));
-
- }
-
- @Test
- public void test5() {
- NumberNormalizer nn = new NumberNormalizer();
- String nn1 = nn.normalizeFieldValue(null, "-0.001");
- String nn2 = nn.normalizeFieldValue(null, "-0.0009");
- String nn3 = nn.normalizeFieldValue(null, "-0.00090");
-
- assertTrue((nn3.compareTo(nn2) == 0) && (nn2.compareTo(nn1) > 0));
-
- }
-
- @Test
- public void test6() {
- NumberNormalizer nn = new NumberNormalizer();
- String nn1 = nn.normalizeFieldValue(null, "00.0");
- String nn2 = nn.normalizeFieldValue(null, "0");
- String nn3 = nn.normalizeFieldValue(null, "0.0");
-
- assertTrue((nn3.compareTo(nn2) == 0) && (nn2.compareTo(nn1) == 0));
-
- }
-
-}