You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ct...@apache.org on 2019/06/05 20:59:41 UTC
[accumulo-wikisearch] branch master updated: Build against Accumulo
2.0.0-alpha-2
This is an automated email from the ASF dual-hosted git repository.
ctubbsii pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo-wikisearch.git
The following commit(s) were added to refs/heads/master by this push:
new 7667f22 Build against Accumulo 2.0.0-alpha-2
7667f22 is described below
commit 7667f22431e71d23c6e8b0b79787c39a29f252f0
Author: Christopher Tubbs <ct...@apache.org>
AuthorDate: Wed Jun 5 16:59:06 2019 -0400
Build against Accumulo 2.0.0-alpha-2
---
NOTICE | 2 +-
ingest/pom.xml | 3 +-
.../wikisearch/ingest/WikipediaConfiguration.java | 89 +--
.../wikisearch/ingest/WikipediaIngester.java | 101 +--
.../wikisearch/ingest/WikipediaMapper.java | 110 ++--
.../ingest/WikipediaPartitionedIngester.java | 172 +++---
.../wikisearch/reader/AggregatingRecordReader.java | 66 +-
.../examples/wikisearch/reader/LfLineReader.java | 67 +-
.../examples/wikisearch/util/TextUtil.java | 32 +-
.../wikisearch/ingest/WikipediaInputSplitTest.java | 13 +-
.../wikisearch/iterator/TextIndexTest.java | 81 ++-
pom.xml | 201 +++---
query-war/pom.xml | 12 +-
query/pom.xml | 29 +-
.../iterator/AbstractEvaluatingIterator.java | 180 +++---
.../examples/wikisearch/iterator/AndIterator.java | 422 +++++++------
.../wikisearch/iterator/BooleanLogicIterator.java | 674 ++++++++++++---------
.../iterator/DefaultIteratorEnvironment.java | 21 +-
.../wikisearch/iterator/EvaluatingIterator.java | 40 +-
.../examples/wikisearch/iterator/OrIterator.java | 378 ++++++------
.../wikisearch/logic/AbstractQueryLogic.java | 411 +++++++------
.../wikisearch/parser/FieldIndexQueryReWriter.java | 464 +++++++-------
.../examples/wikisearch/parser/QueryEvaluator.java | 110 ++--
.../wikisearch/parser/RangeCalculator.java | 640 +++++++++++--------
.../examples/wikisearch/parser/TreeBuilder.java | 278 +++++----
.../accumulo/examples/wikisearch/query/Query.java | 114 ++--
.../examples/wikisearch/util/BaseKeyParser.java | 33 +-
27 files changed, 2566 insertions(+), 2177 deletions(-)
diff --git a/NOTICE b/NOTICE
index 8a2d875..b58fdab 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,5 +1,5 @@
Apache Accumulo Wikisearch
-Copyright 2011-2017 The Apache Software Foundation
+Copyright 2011-2019 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
diff --git a/ingest/pom.xml b/ingest/pom.xml
index 426cff2..a317f5b 100644
--- a/ingest/pom.xml
+++ b/ingest/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-wikisearch</artifactId>
- <version>1.8.0</version>
+ <version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>wikisearch-ingest</artifactId>
<name>wikisearch-ingest</name>
@@ -102,7 +102,6 @@
<descriptors>
<descriptor>src/assembly/dist.xml</descriptor>
</descriptors>
- <tarLongFileMode>gnu</tarLongFileMode>
</configuration>
</plugin>
</plugins>
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
index 27a28a1..44a3fbc 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
@@ -35,21 +35,21 @@ public class WikipediaConfiguration {
public final static String USER = "wikipedia.accumulo.user";
public final static String PASSWORD = "wikipedia.accumulo.password";
public final static String TABLE_NAME = "wikipedia.accumulo.table";
-
+
public final static String ZOOKEEPERS = "wikipedia.accumulo.zookeepers";
-
+
public final static String NAMESPACES_FILENAME = "wikipedia.namespaces.filename";
public final static String LANGUAGES_FILENAME = "wikipedia.languages.filename";
public final static String WORKING_DIRECTORY = "wikipedia.ingest.working";
-
+
public final static String ANALYZER = "wikipedia.index.analyzer";
-
+
public final static String NUM_PARTITIONS = "wikipedia.ingest.partitions";
public final static String NUM_GROUPS = "wikipedia.ingest.groups";
public final static String PARTITIONED_ARTICLES_DIRECTORY = "wikipedia.partitioned.directory";
-
+
public final static String RUN_PARTITIONER = "wikipedia.run.partitioner";
public final static String RUN_INGEST = "wikipedia.run.ingest";
public final static String BULK_INGEST = "wikipedia.bulk.ingest";
@@ -57,12 +57,11 @@ public class WikipediaConfiguration {
public final static String BULK_INGEST_FAILURE_DIR = "wikipedia.bulk.ingest.failure.dir";
public final static String BULK_INGEST_BUFFER_SIZE = "wikipedia.bulk.ingest.buffer.size";
public final static String PARTITIONED_INPUT_MIN_SPLIT_SIZE = "wikipedia.min.input.split.size";
-
-
+
public static String getUser(Configuration conf) {
return conf.get(USER);
- };
-
+ }
+
public static byte[] getPassword(Configuration conf) {
String pass = conf.get(PASSWORD);
if (pass == null) {
@@ -70,7 +69,7 @@ public class WikipediaConfiguration {
}
return pass.getBytes();
}
-
+
public static String getTableName(Configuration conf) {
String tablename = conf.get(TABLE_NAME);
if (tablename == null) {
@@ -78,11 +77,11 @@ public class WikipediaConfiguration {
}
return tablename;
}
-
+
public static String getInstanceName(Configuration conf) {
return conf.get(INSTANCE_NAME);
}
-
+
public static String getZookeepers(Configuration conf) {
String zookeepers = conf.get(ZOOKEEPERS);
if (zookeepers == null) {
@@ -90,47 +89,51 @@ public class WikipediaConfiguration {
}
return zookeepers;
}
-
+
public static Path getNamespacesFile(Configuration conf) {
- String filename = conf.get(NAMESPACES_FILENAME, new Path(getWorkingDirectory(conf), "namespaces.dat").toString());
+ String filename = conf.get(NAMESPACES_FILENAME,
+ new Path(getWorkingDirectory(conf), "namespaces.dat").toString());
return new Path(filename);
}
-
+
public static Path getLanguagesFile(Configuration conf) {
- String filename = conf.get(LANGUAGES_FILENAME, new Path(getWorkingDirectory(conf), "languages.txt").toString());
+ String filename = conf.get(LANGUAGES_FILENAME,
+ new Path(getWorkingDirectory(conf), "languages.txt").toString());
return new Path(filename);
}
-
+
public static Path getWorkingDirectory(Configuration conf) {
String filename = conf.get(WORKING_DIRECTORY);
return new Path(filename);
}
-
+
public static Analyzer getAnalyzer(Configuration conf) throws IOException {
- Class<? extends Analyzer> analyzerClass = conf.getClass(ANALYZER, SimpleAnalyzer.class, Analyzer.class);
+ Class<? extends Analyzer> analyzerClass =
+ conf.getClass(ANALYZER, SimpleAnalyzer.class, Analyzer.class);
return ReflectionUtils.newInstance(analyzerClass, conf);
}
-
- public static Connector getConnector(Configuration conf) throws AccumuloException, AccumuloSecurityException {
+
+ public static Connector getConnector(Configuration conf)
+ throws AccumuloException, AccumuloSecurityException {
return getInstance(conf).getConnector(getUser(conf), getPassword(conf));
}
-
+
public static Instance getInstance(Configuration conf) {
return new ZooKeeperInstance(getInstanceName(conf), getZookeepers(conf));
}
-
+
public static int getNumPartitions(Configuration conf) {
return conf.getInt(NUM_PARTITIONS, 25);
}
-
+
public static int getNumGroups(Configuration conf) {
return conf.getInt(NUM_GROUPS, 1);
}
-
+
public static Path getPartitionedArticlesPath(Configuration conf) {
return new Path(conf.get(PARTITIONED_ARTICLES_DIRECTORY));
}
-
+
public static long getMinInputSplitSize(Configuration conf) {
return conf.getLong(PARTITIONED_INPUT_MIN_SPLIT_SIZE, 1l << 27);
}
@@ -154,18 +157,14 @@ public class WikipediaConfiguration {
public static String bulkIngestFailureDir(Configuration conf) {
return conf.get(BULK_INGEST_FAILURE_DIR);
}
-
+
public static long bulkIngestBufferSize(Configuration conf) {
- return conf.getLong(BULK_INGEST_BUFFER_SIZE,1l<<28);
+ return conf.getLong(BULK_INGEST_BUFFER_SIZE, 1l << 28);
}
/**
* Helper method to get properties from Hadoop configuration
- *
- * @param <T>
- * @param conf
- * @param propertyName
- * @param resultClass
+ *
* @throws IllegalArgumentException
* if property is not defined, null, or empty. Or if resultClass is not handled.
* @return value of property
@@ -173,26 +172,28 @@ public class WikipediaConfiguration {
@SuppressWarnings("unchecked")
public static <T> T isNull(Configuration conf, String propertyName, Class<T> resultClass) {
String p = conf.get(propertyName);
- if (StringUtils.isEmpty(p))
+ if (StringUtils.isEmpty(p)) {
throw new IllegalArgumentException(propertyName + " must be specified");
-
- if (resultClass.equals(String.class))
+ }
+
+ if (resultClass.equals(String.class)) {
return (T) p;
- else if (resultClass.equals(String[].class))
+ } else if (resultClass.equals(String[].class)) {
return (T) conf.getStrings(propertyName);
- else if (resultClass.equals(Boolean.class))
+ } else if (resultClass.equals(Boolean.class)) {
return (T) Boolean.valueOf(p);
- else if (resultClass.equals(Long.class))
+ } else if (resultClass.equals(Long.class)) {
return (T) Long.valueOf(p);
- else if (resultClass.equals(Integer.class))
+ } else if (resultClass.equals(Integer.class)) {
return (T) Integer.valueOf(p);
- else if (resultClass.equals(Float.class))
+ } else if (resultClass.equals(Float.class)) {
return (T) Float.valueOf(p);
- else if (resultClass.equals(Double.class))
+ } else if (resultClass.equals(Double.class)) {
return (T) Double.valueOf(p);
- else
+ } else {
throw new IllegalArgumentException(resultClass.getSimpleName() + " is unhandled.");
-
+ }
+
}
}
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
index 1a495ed..4be65d5 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
@@ -30,12 +30,12 @@ import java.util.regex.Pattern;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.IteratorSetting.Column;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
@@ -59,68 +59,73 @@ import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WikipediaIngester extends Configured implements Tool {
-
+
public final static String INGEST_LANGUAGE = "wikipedia.ingest_language";
public final static String SPLIT_FILE = "wikipedia.split_file";
public final static String TABLE_NAME = "wikipedia.table";
-
+
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new WikipediaIngester(), args);
System.exit(res);
}
-
- public static void createTables(TableOperations tops, String tableName, boolean configureLocalityGroups) throws AccumuloException, AccumuloSecurityException,
+
+ public static void createTables(TableOperations tops, String tableName,
+ boolean configureLocalityGroups) throws AccumuloException, AccumuloSecurityException,
TableNotFoundException, TableExistsException {
// Create the shard table
String indexTableName = tableName + "Index";
String reverseIndexTableName = tableName + "ReverseIndex";
String metadataTableName = tableName + "Metadata";
-
+
// create the shard table
if (!tops.exists(tableName)) {
- // Set a text index combiner on the given field names. No combiner is set if the option is not supplied
+ // Set a text index combiner on the given field names. No combiner is set if the option is not
+ // supplied
String textIndexFamilies = WikipediaMapper.TOKENS_FIELD_NAME;
-
+
tops.create(tableName);
if (textIndexFamilies.length() > 0) {
System.out.println("Adding content combiner on the fields: " + textIndexFamilies);
-
+
IteratorSetting setting = new IteratorSetting(10, TextIndexCombiner.class);
- List<Column> columns = new ArrayList<Column>();
+ List<Column> columns = new ArrayList<>();
for (String family : StringUtils.split(textIndexFamilies, ',')) {
columns.add(new Column("fi\0" + family));
}
TextIndexCombiner.setColumns(setting, columns);
TextIndexCombiner.setLossyness(setting, true);
-
+
tops.attachIterator(tableName, setting, EnumSet.allOf(IteratorScope.class));
}
-
+
// Set the locality group for the full content column family
- if (configureLocalityGroups)
- tops.setLocalityGroups(tableName,
- Collections.singletonMap("WikipediaDocuments", Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY))));
-
+ if (configureLocalityGroups) {
+ tops.setLocalityGroups(tableName, Collections.singletonMap("WikipediaDocuments",
+ Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY))));
+ }
+
}
-
+
if (!tops.exists(indexTableName)) {
tops.create(indexTableName);
// Add the UID combiner
- IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
+ IteratorSetting setting =
+ new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
GlobalIndexUidCombiner.setLossyness(setting, true);
tops.attachIterator(indexTableName, setting, EnumSet.allOf(IteratorScope.class));
}
-
+
if (!tops.exists(reverseIndexTableName)) {
tops.create(reverseIndexTableName);
// Add the UID combiner
- IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
+ IteratorSetting setting =
+ new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
GlobalIndexUidCombiner.setLossyness(setting, true);
tops.attachIterator(reverseIndexTableName, setting, EnumSet.allOf(IteratorScope.class));
}
-
+
if (!tops.exists(metadataTableName)) {
// Add the SummingCombiner with VARLEN encoding for the frequency column
tops.create(metadataTableName);
@@ -130,42 +135,44 @@ public class WikipediaIngester extends Configured implements Tool {
tops.attachIterator(metadataTableName, setting, EnumSet.allOf(IteratorScope.class));
}
}
-
+
@Override
public int run(String[] args) throws Exception {
Job job = new Job(getConf(), "Ingest Wikipedia");
Configuration conf = job.getConfiguration();
conf.set("mapred.map.tasks.speculative.execution", "false");
-
+
String tablename = WikipediaConfiguration.getTableName(conf);
- ClientConfiguration clientConfig = new ClientConfiguration();
- clientConfig.setProperty(ClientProperty.INSTANCE_NAME, WikipediaConfiguration.getInstanceName(conf));
- clientConfig.setProperty(ClientProperty.INSTANCE_ZK_HOST, WikipediaConfiguration.getZookeepers(conf));
-
+ ClientConfiguration clientConfig = ClientConfiguration.create();
+ clientConfig.setProperty(ClientProperty.INSTANCE_NAME,
+ WikipediaConfiguration.getInstanceName(conf));
+ clientConfig.setProperty(ClientProperty.INSTANCE_ZK_HOST,
+ WikipediaConfiguration.getZookeepers(conf));
+
String user = WikipediaConfiguration.getUser(conf);
byte[] password = WikipediaConfiguration.getPassword(conf);
Connector connector = WikipediaConfiguration.getConnector(conf);
-
+
TableOperations tops = connector.tableOperations();
-
+
createTables(tops, tablename, true);
-
+
configureJob(job);
-
- List<Path> inputPaths = new ArrayList<Path>();
- SortedSet<String> languages = new TreeSet<String>();
+
+ List<Path> inputPaths = new ArrayList<>();
+ SortedSet<String> languages = new TreeSet<>();
FileSystem fs = FileSystem.get(conf);
Path parent = new Path(conf.get("wikipedia.input"));
listFiles(parent, fs, inputPaths, languages);
-
+
System.out.println("Input files in " + parent + ":" + inputPaths.size());
Path[] inputPathsArray = new Path[inputPaths.size()];
inputPaths.toArray(inputPathsArray);
-
+
System.out.println("Languages:" + languages.size());
-
+
FileInputFormat.setInputPaths(job, inputPathsArray);
-
+
job.setMapperClass(WikipediaMapper.class);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(Text.class);
@@ -173,17 +180,12 @@ public class WikipediaIngester extends Configured implements Tool {
job.setOutputFormatClass(AccumuloOutputFormat.class);
AccumuloOutputFormat.setConnectorInfo(job, user, new PasswordToken(password));
AccumuloOutputFormat.setZooKeeperInstance(job, clientConfig);
-
+
return job.waitForCompletion(true) ? 0 : 1;
}
-
- public final static PathFilter partFilter = new PathFilter() {
- @Override
- public boolean accept(Path path) {
- return path.getName().startsWith("part");
- };
- };
-
+
+ public final static PathFilter partFilter = path -> path.getName().startsWith("part");
+
protected void configureJob(Job job) {
Configuration conf = job.getConfiguration();
job.setJarByClass(WikipediaIngester.class);
@@ -191,10 +193,11 @@ public class WikipediaIngester extends Configured implements Tool {
conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
}
-
+
protected static final Pattern filePattern = Pattern.compile("([a-z_]+).*.xml(.bz2)?");
-
- protected void listFiles(Path path, FileSystem fs, List<Path> files, Set<String> languages) throws IOException {
+
+ protected void listFiles(Path path, FileSystem fs, List<Path> files, Set<String> languages)
+ throws IOException {
for (FileStatus status : fs.listStatus(path)) {
if (status.isDir()) {
listFiles(status.getPath(), fs, files, languages);
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
index 8565b09..c751637 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java
@@ -15,11 +15,10 @@
* limitations under the License.
*/
/**
- *
+ *
*/
package org.apache.accumulo.examples.wikisearch.ingest;
-
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
@@ -55,19 +54,19 @@ import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
-
+
private static final Logger log = Logger.getLogger(WikipediaMapper.class);
-
+
public final static Charset UTF8 = Charset.forName("UTF-8");
public static final String DOCUMENT_COLUMN_FAMILY = "d";
public static final String METADATA_EVENT_COLUMN_FAMILY = "e";
public static final String METADATA_INDEX_COLUMN_FAMILY = "i";
public static final String TOKENS_FIELD_NAME = "TEXT";
-
+
private final static Pattern languagePattern = Pattern.compile("([a-z_]+).*.xml(.bz2)?");
private static final Value NULL_VALUE = new Value(new byte[0]);
private static final String cvPrefix = "all|";
-
+
private ArticleExtractor extractor;
private String language;
private int numPartitions = 0;
@@ -75,12 +74,12 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
private int myGroup = -1;
private int numGroups = -1;
-
+
private Text tablename = null;
private Text indexTableName = null;
private Text reverseIndexTableName = null;
private Text metadataTableName = null;
-
+
@Override
public void setup(Context context) {
Configuration conf = context.getConfiguration();
@@ -88,11 +87,11 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
indexTableName = new Text(tablename + "Index");
reverseIndexTableName = new Text(tablename + "ReverseIndex");
metadataTableName = new Text(tablename + "Metadata");
-
- WikipediaInputSplit wiSplit = (WikipediaInputSplit)context.getInputSplit();
+
+ WikipediaInputSplit wiSplit = (WikipediaInputSplit) context.getInputSplit();
myGroup = wiSplit.getPartition();
numGroups = WikipediaConfiguration.getNumGroups(conf);
-
+
FileSplit split = wiSplit.getFileSplit();
String fileName = split.getPath().getName();
Matcher matcher = languagePattern.matcher(fileName);
@@ -104,40 +103,44 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
extractor = new ArticleExtractor();
numPartitions = WikipediaConfiguration.getNumPartitions(conf);
cv = new ColumnVisibility(cvPrefix + language);
-
+
}
-
+
/**
* We will partition the documents based on the document id
- *
- * @param article
- * @param numPartitions
+ *
* @return The number of the partition for a given article.
- * @throws IllegalFormatException
*/
- public static int getPartitionId(Article article, int numPartitions) throws IllegalFormatException {
+ public static int getPartitionId(Article article, int numPartitions)
+ throws IllegalFormatException {
return article.getId() % numPartitions;
}
-
- static HashSet<String> metadataSent = new HashSet<String>();
+
+ static HashSet<String> metadataSent = new HashSet<>();
@Override
- protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
- Article article = extractor.extract(new InputStreamReader(new ByteArrayInputStream(value.getBytes()), UTF8));
+ protected void map(LongWritable key, Text value, Context context)
+ throws IOException, InterruptedException {
+ Article article =
+ extractor.extract(new InputStreamReader(new ByteArrayInputStream(value.getBytes()), UTF8));
String NULL_BYTE = "\u0000";
String colfPrefix = language + NULL_BYTE;
String indexPrefix = "fi" + NULL_BYTE;
if (article != null) {
int groupId = WikipediaMapper.getPartitionId(article, numGroups);
- if(groupId != myGroup)
+ if (groupId != myGroup) {
return;
- Text partitionId = new Text(Integer.toString(WikipediaMapper.getPartitionId(article, numPartitions)));
-
+ }
+ Text partitionId =
+ new Text(Integer.toString(WikipediaMapper.getPartitionId(article, numPartitions)));
+
// Create the mutations for the document.
// Row is partition id, colf is language0articleid, colq is fieldName\0fieldValue
Mutation m = new Mutation(partitionId);
for (Entry<String,Object> entry : article.getFieldValues().entrySet()) {
- m.put(colfPrefix + article.getId(), entry.getKey() + NULL_BYTE + entry.getValue().toString(), cv, article.getTimestamp(), NULL_VALUE);
+ m.put(colfPrefix + article.getId(),
+ entry.getKey() + NULL_BYTE + entry.getValue().toString(), cv, article.getTimestamp(),
+ NULL_VALUE);
// Create mutations for the metadata table.
String metadataKey = entry.getKey() + METADATA_EVENT_COLUMN_FAMILY + language;
if (!metadataSent.contains(metadataKey)) {
@@ -147,26 +150,30 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
metadataSent.add(metadataKey);
}
}
-
+
// Tokenize the content
Set<String> tokens = getTokens(article);
-
+
// We are going to put the fields to be indexed into a multimap. This allows us to iterate
// over the entire set once.
Multimap<String,String> indexFields = HashMultimap.create();
// Add the normalized field values
LcNoDiacriticsNormalizer normalizer = new LcNoDiacriticsNormalizer();
- for (Entry<String,String> index : article.getNormalizedFieldValues().entrySet())
+ for (Entry<String,String> index : article.getNormalizedFieldValues().entrySet()) {
indexFields.put(index.getKey(), index.getValue());
+ }
// Add the tokens
- for (String token : tokens)
+ for (String token : tokens) {
indexFields.put(TOKENS_FIELD_NAME, normalizer.normalizeFieldValue("", token));
-
+ }
+
for (Entry<String,String> index : indexFields.entries()) {
// Create mutations for the in partition index
// Row is partition id, colf is 'fi'\0fieldName, colq is fieldValue\0language\0article id
- m.put(indexPrefix + index.getKey(), index.getValue() + NULL_BYTE + colfPrefix + article.getId(), cv, article.getTimestamp(), NULL_VALUE);
-
+ m.put(indexPrefix + index.getKey(),
+ index.getValue() + NULL_BYTE + colfPrefix + article.getId(), cv, article.getTimestamp(),
+ NULL_VALUE);
+
// Create mutations for the global index
// Create a UID object for the Value
Builder uidBuilder = Uid.List.newBuilder();
@@ -175,54 +182,57 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
uidBuilder.addUID(Integer.toString(article.getId()));
Uid.List uidList = uidBuilder.build();
Value val = new Value(uidList.toByteArray());
-
+
// Create mutations for the global index
- // Row is field value, colf is field name, colq is partitionid\0language, value is Uid.List object
+ // Row is field value, colf is field name, colq is partitionid\0language, value is Uid.List
+ // object
Mutation gm = new Mutation(index.getValue());
gm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), val);
context.write(indexTableName, gm);
-
+
// Create mutations for the global reverse index
Mutation grm = new Mutation(StringUtils.reverse(index.getValue()));
- grm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), val);
+ grm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(),
+ val);
context.write(reverseIndexTableName, grm);
-
+
// Create mutations for the metadata table.
String metadataKey = index.getKey() + METADATA_INDEX_COLUMN_FAMILY + language;
if (!metadataSent.contains(metadataKey)) {
Mutation mm = new Mutation(index.getKey());
- mm.put(METADATA_INDEX_COLUMN_FAMILY, language + NULL_BYTE + LcNoDiacriticsNormalizer.class.getName(), cv, article.getTimestamp(), NULL_VALUE);
+ mm.put(METADATA_INDEX_COLUMN_FAMILY,
+ language + NULL_BYTE + LcNoDiacriticsNormalizer.class.getName(), cv,
+ article.getTimestamp(), NULL_VALUE);
context.write(metadataTableName, mm);
metadataSent.add(metadataKey);
}
}
// Add the entire text to the document section of the table.
- // row is the partition, colf is 'd', colq is language\0articleid, value is Base64 encoded GZIP'd document
- m.put(DOCUMENT_COLUMN_FAMILY, colfPrefix + article.getId(), cv, article.getTimestamp(), new Value(Base64.encodeBase64(article.getText().getBytes())));
+ // row is the partition, colf is 'd', colq is language\0articleid, value is Base64 encoded
+ // GZIP'd document
+ m.put(DOCUMENT_COLUMN_FAMILY, colfPrefix + article.getId(), cv, article.getTimestamp(),
+ new Value(Base64.encodeBase64(article.getText().getBytes())));
context.write(tablename, m);
-
+
} else {
context.getCounter("wikipedia", "invalid articles").increment(1);
}
context.progress();
}
-
+
/**
* Tokenize the wikipedia content
- *
- * @param article
- * @return
- * @throws IOException
*/
static Set<String> getTokens(Article article) throws IOException {
- Set<String> tokenList = new HashSet<String>();
+ Set<String> tokenList = new HashSet<>();
WikipediaTokenizer tok = new WikipediaTokenizer(new StringReader(article.getText()));
TermAttribute term = tok.addAttribute(TermAttribute.class);
try {
while (tok.incrementToken()) {
String token = term.term();
- if (!StringUtils.isEmpty(token))
+ if (!StringUtils.isEmpty(token)) {
tokenList.add(token);
+ }
}
} catch (IOException e) {
log.error("Error tokenizing text", e);
@@ -241,5 +251,5 @@ public class WikipediaMapper extends Mapper<LongWritable,Text,Text,Mutation> {
}
return tokenList;
}
-
+
}
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
index 841f169..e9248a8 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
@@ -30,12 +30,12 @@ import java.util.regex.Pattern;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.IteratorSetting.Column;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
@@ -70,62 +70,66 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
public final static String INGEST_LANGUAGE = "wikipedia.ingest_language";
public final static String SPLIT_FILE = "wikipedia.split_file";
public final static String TABLE_NAME = "wikipedia.table";
-
+
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new WikipediaPartitionedIngester(), args);
System.exit(res);
}
-
- private void createTables(TableOperations tops, String tableName) throws AccumuloException, AccumuloSecurityException, TableNotFoundException,
- TableExistsException {
+
+ private void createTables(TableOperations tops, String tableName) throws AccumuloException,
+ AccumuloSecurityException, TableNotFoundException, TableExistsException {
// Create the shard table
String indexTableName = tableName + "Index";
String reverseIndexTableName = tableName + "ReverseIndex";
String metadataTableName = tableName + "Metadata";
-
+
// create the shard table
if (!tops.exists(tableName)) {
- // Set a text index combiner on the given field names. No combiner is set if the option is not supplied
+ // Set a text index combiner on the given field names. No combiner is set if the option is not
+ // supplied
String textIndexFamilies = WikipediaMapper.TOKENS_FIELD_NAME;
-
+
tops.create(tableName);
if (textIndexFamilies.length() > 0) {
System.out.println("Adding content combiner on the fields: " + textIndexFamilies);
-
+
IteratorSetting setting = new IteratorSetting(10, TextIndexCombiner.class);
- List<Column> columns = new ArrayList<Column>();
+ List<Column> columns = new ArrayList<>();
for (String family : StringUtils.split(textIndexFamilies, ',')) {
columns.add(new Column("fi\0" + family));
}
TextIndexCombiner.setColumns(setting, columns);
TextIndexCombiner.setLossyness(setting, true);
-
+
tops.attachIterator(tableName, setting, EnumSet.allOf(IteratorScope.class));
}
-
+
// Set the locality group for the full content column family
- tops.setLocalityGroups(tableName, Collections.singletonMap("WikipediaDocuments", Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY))));
-
+ tops.setLocalityGroups(tableName, Collections.singletonMap("WikipediaDocuments",
+ Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY))));
+
}
-
+
if (!tops.exists(indexTableName)) {
tops.create(indexTableName);
// Add the UID combiner
- IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
+ IteratorSetting setting =
+ new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
GlobalIndexUidCombiner.setLossyness(setting, true);
tops.attachIterator(indexTableName, setting, EnumSet.allOf(IteratorScope.class));
}
-
+
if (!tops.exists(reverseIndexTableName)) {
tops.create(reverseIndexTableName);
// Add the UID combiner
- IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
+ IteratorSetting setting =
+ new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
GlobalIndexUidCombiner.setLossyness(setting, true);
tops.attachIterator(reverseIndexTableName, setting, EnumSet.allOf(IteratorScope.class));
}
-
+
if (!tops.exists(metadataTableName)) {
// Add the SummingCombiner with VARLEN encoding for the frequency column
tops.create(metadataTableName);
@@ -135,51 +139,51 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
tops.attachIterator(metadataTableName, setting, EnumSet.allOf(IteratorScope.class));
}
}
-
+
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
- if(WikipediaConfiguration.runPartitioner(conf))
- {
+ if (WikipediaConfiguration.runPartitioner(conf)) {
int result = runPartitionerJob();
- if(result != 0)
+ if (result != 0) {
return result;
+ }
}
- if(WikipediaConfiguration.runIngest(conf))
- {
+ if (WikipediaConfiguration.runIngest(conf)) {
int result = runIngestJob();
- if(result != 0)
+ if (result != 0) {
return result;
- if(WikipediaConfiguration.bulkIngest(conf))
+ }
+ if (WikipediaConfiguration.bulkIngest(conf)) {
return loadBulkFiles();
+ }
}
return 0;
}
-
- private int runPartitionerJob() throws Exception
- {
+
+ private int runPartitionerJob() throws Exception {
Job partitionerJob = new Job(getConf(), "Partition Wikipedia");
Configuration partitionerConf = partitionerJob.getConfiguration();
partitionerConf.set("mapred.map.tasks.speculative.execution", "false");
configurePartitionerJob(partitionerJob);
-
- List<Path> inputPaths = new ArrayList<Path>();
- SortedSet<String> languages = new TreeSet<String>();
+
+ List<Path> inputPaths = new ArrayList<>();
+ SortedSet<String> languages = new TreeSet<>();
FileSystem fs = FileSystem.get(partitionerConf);
Path parent = new Path(partitionerConf.get("wikipedia.input"));
listFiles(parent, fs, inputPaths, languages);
-
+
System.out.println("Input files in " + parent + ":" + inputPaths.size());
Path[] inputPathsArray = new Path[inputPaths.size()];
inputPaths.toArray(inputPathsArray);
-
+
System.out.println("Languages:" + languages.size());
// setup input format
-
+
WikipediaInputFormat.setInputPaths(partitionerJob, inputPathsArray);
-
+
partitionerJob.setMapperClass(WikipediaPartitioner.class);
partitionerJob.setNumReduceTasks(0);
@@ -193,95 +197,94 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir);
SequenceFileOutputFormat.setCompressOutput(partitionerJob, true);
SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD);
-
+
return partitionerJob.waitForCompletion(true) ? 0 : 1;
}
-
- private int runIngestJob() throws Exception
- {
- Job ingestJob = new Job(getConf(), "Ingest Partitioned Wikipedia");
+
+ private int runIngestJob() throws Exception {
+ Job ingestJob = Job.getInstance(getConf(), "Ingest Partitioned Wikipedia");
Configuration ingestConf = ingestJob.getConfiguration();
ingestConf.set("mapred.map.tasks.speculative.execution", "false");
configureIngestJob(ingestJob);
-
+
String tablename = WikipediaConfiguration.getTableName(ingestConf);
-
+
Connector connector = WikipediaConfiguration.getConnector(ingestConf);
-
+
TableOperations tops = connector.tableOperations();
-
+
createTables(tops, tablename);
-
+
ingestJob.setMapperClass(WikipediaPartitionedMapper.class);
ingestJob.setNumReduceTasks(0);
-
+
// setup input format
ingestJob.setInputFormatClass(SequenceFileInputFormat.class);
- SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
+ SequenceFileInputFormat.setInputPaths(ingestJob,
+ WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
// TODO make split size configurable
- SequenceFileInputFormat.setMinInputSplitSize(ingestJob, WikipediaConfiguration.getMinInputSplitSize(ingestConf));
+ SequenceFileInputFormat.setMinInputSplitSize(ingestJob,
+ WikipediaConfiguration.getMinInputSplitSize(ingestConf));
// setup output format
ingestJob.setMapOutputKeyClass(Text.class);
ingestJob.setMapOutputValueClass(Mutation.class);
-
- if(WikipediaConfiguration.bulkIngest(ingestConf))
- {
+
+ if (WikipediaConfiguration.bulkIngest(ingestConf)) {
ingestJob.setOutputFormatClass(SortingRFileOutputFormat.class);
- SortingRFileOutputFormat.setMaxBufferSize(ingestConf, WikipediaConfiguration.bulkIngestBufferSize(ingestConf));
+ SortingRFileOutputFormat.setMaxBufferSize(ingestConf,
+ WikipediaConfiguration.bulkIngestBufferSize(ingestConf));
String bulkIngestDir = WikipediaConfiguration.bulkIngestDir(ingestConf);
- if(bulkIngestDir == null)
- {
+ if (bulkIngestDir == null) {
log.error("Bulk ingest dir not set");
return 1;
}
- SortingRFileOutputFormat.setPathName(ingestConf, WikipediaConfiguration.bulkIngestDir(ingestConf));
+ SortingRFileOutputFormat.setPathName(ingestConf,
+ WikipediaConfiguration.bulkIngestDir(ingestConf));
} else {
ingestJob.setOutputFormatClass(AccumuloOutputFormat.class);
- ClientConfiguration clientConfig = new ClientConfiguration();
- clientConfig.setProperty(ClientProperty.INSTANCE_NAME, WikipediaConfiguration.getInstanceName(ingestConf));
- clientConfig.setProperty(ClientProperty.INSTANCE_ZK_HOST, WikipediaConfiguration.getZookeepers(ingestConf));
+ ClientConfiguration clientConfig = ClientConfiguration.create();
+ clientConfig.setProperty(ClientProperty.INSTANCE_NAME,
+ WikipediaConfiguration.getInstanceName(ingestConf));
+ clientConfig.setProperty(ClientProperty.INSTANCE_ZK_HOST,
+ WikipediaConfiguration.getZookeepers(ingestConf));
String user = WikipediaConfiguration.getUser(ingestConf);
byte[] password = WikipediaConfiguration.getPassword(ingestConf);
AccumuloOutputFormat.setConnectorInfo(ingestJob, user, new PasswordToken(password));
AccumuloOutputFormat.setZooKeeperInstance(ingestJob, clientConfig);
}
-
+
return ingestJob.waitForCompletion(true) ? 0 : 1;
}
-
- private int loadBulkFiles() throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException
- {
+
+ private int loadBulkFiles()
+ throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
Configuration conf = getConf();
Connector connector = WikipediaConfiguration.getConnector(conf);
-
+
FileSystem fs = FileSystem.get(conf);
String directory = WikipediaConfiguration.bulkIngestDir(conf);
-
+
String failureDirectory = WikipediaConfiguration.bulkIngestFailureDir(conf);
-
- for(FileStatus status: fs.listStatus(new Path(directory)))
- {
- if(status.isDir() == false)
+
+ for (FileStatus status : fs.listStatus(new Path(directory))) {
+ if (status.isDir() == false) {
continue;
+ }
Path dir = status.getPath();
- Path failPath = new Path(failureDirectory+"/"+dir.getName());
+ Path failPath = new Path(failureDirectory + "/" + dir.getName());
fs.mkdirs(failPath);
- connector.tableOperations().importDirectory(dir.getName(), dir.toString(), failPath.toString(), true);
+ connector.tableOperations().importDirectory(dir.getName(), dir.toString(),
+ failPath.toString(), true);
}
-
+
return 0;
}
-
- public final static PathFilter partFilter = new PathFilter() {
- @Override
- public boolean accept(Path path) {
- return path.getName().startsWith("part");
- };
- };
-
+
+ public final static PathFilter partFilter = path -> path.getName().startsWith("part");
+
protected void configurePartitionerJob(Job job) {
Configuration conf = job.getConfiguration();
job.setJarByClass(WikipediaPartitionedIngester.class);
@@ -293,10 +296,11 @@ public class WikipediaPartitionedIngester extends Configured implements Tool {
protected void configureIngestJob(Job job) {
job.setJarByClass(WikipediaPartitionedIngester.class);
}
-
+
protected static final Pattern filePattern = Pattern.compile("([a-z_]+).*.xml(.bz2)?");
-
- protected void listFiles(Path path, FileSystem fs, List<Path> files, Set<String> languages) throws IOException {
+
+ protected void listFiles(Path path, FileSystem fs, List<Path> files, Set<String> languages)
+ throws IOException {
for (FileStatus status : fs.listStatus(path)) {
if (status.isDir()) {
listFiles(status.getPath(), fs, files, languages);
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java
index 09755c0..9eda61d 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java
@@ -16,7 +16,6 @@
*/
package org.apache.accumulo.examples.wikisearch.reader;
-
import java.io.IOException;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
@@ -27,18 +26,17 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
/**
- * This class aggregates Text values based on a start and end filter. An example use case for this would be XML data. This will not work with data that has
- * nested start and stop tokens.
- *
+ * This class aggregates Text values based on a start and end filter. An example use case for this
+ * would be XML data. This will not work with data that has nested start and stop tokens.
+ *
*/
public class AggregatingRecordReader extends LongLineRecordReader {
-
+
public static final String START_TOKEN = "aggregating.token.start";
public static final String END_TOKEN = "aggregating.token.end";
public static final String RETURN_PARTIAL_MATCHES = "aggregating.allow.partial";
-
+
private LongWritable key = new LongWritable();
private String startToken = null;
private String endToken = null;
@@ -47,35 +45,40 @@ public class AggregatingRecordReader extends LongLineRecordReader {
private boolean startFound = false;
private StringBuilder remainder = new StringBuilder(0);
private boolean returnPartialMatches = false;
-
+
@Override
public LongWritable getCurrentKey() {
key.set(counter);
return key;
}
-
+
@Override
public Text getCurrentValue() {
return aggValue;
}
-
+
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
- super.initialize(((WikipediaInputSplit)genericSplit).getFileSplit(), context);
- this.startToken = WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class);
- this.endToken = WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class);
- this.returnPartialMatches = context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false);
-
+ super.initialize(((WikipediaInputSplit) genericSplit).getFileSplit(), context);
+ this.startToken =
+ WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class);
+ this.endToken =
+ WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class);
+ this.returnPartialMatches =
+ context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false);
+
/*
- * Text-appending works almost exactly like the + operator on Strings- it creates a byte array exactly the size of [prefix + suffix] and dumps the bytes
- * into the new array. This module works by doing lots of little additions, one line at a time. With most XML, the documents are partitioned on line
- * boundaries, so we will generally have lots of additions. Setting a large default byte array for a text object can avoid this and give us
- * StringBuilder-like functionality for Text objects.
+ * Text-appending works almost exactly like the + operator on Strings- it creates a byte array
+ * exactly the size of [prefix + suffix] and dumps the bytes into the new array. This module
+ * works by doing lots of little additions, one line at a time. With most XML, the documents are
+ * partitioned on line boundaries, so we will generally have lots of additions. Setting a large
+ * default byte array for a text object can avoid this and give us StringBuilder-like
+ * functionality for Text objects.
*/
byte[] txtBuffer = new byte[2048];
aggValue.set(txtBuffer);
}
-
+
@Override
public boolean nextKeyValue() throws IOException {
aggValue.clear();
@@ -83,10 +86,11 @@ public class AggregatingRecordReader extends LongLineRecordReader {
boolean finished = false;
// Find the start token
while (!finished && (((hasNext = super.nextKeyValue()) == true) || remainder.length() > 0)) {
- if (hasNext)
+ if (hasNext) {
finished = process(super.getCurrentValue());
- else
+ } else {
finished = process(null);
+ }
if (finished) {
startFound = false;
counter++;
@@ -104,24 +108,25 @@ public class AggregatingRecordReader extends LongLineRecordReader {
}
return false;
}
-
+
/**
* Populates aggValue with the contents of the Text object.
- *
- * @param t
+ *
* @return true if aggValue is complete, else false and needs more data.
*/
private boolean process(Text t) {
-
- if (null != t)
+
+ if (null != t) {
remainder.append(t.toString());
+ }
while (remainder.length() > 0) {
if (!startFound) {
// If found, then begin aggregating at the start offset
int start = remainder.indexOf(startToken);
if (-1 != start) {
// Append the start token to the aggregate value
- TextUtil.textAppendNoNull(aggValue, remainder.substring(start, start + startToken.length()), false);
+ TextUtil.textAppendNoNull(aggValue,
+ remainder.substring(start, start + startToken.length()), false);
// Remove to the end of the start token from the remainder
remainder.delete(0, start + startToken.length());
startFound = true;
@@ -157,7 +162,8 @@ public class AggregatingRecordReader extends LongLineRecordReader {
return true;
} else {
// END_TOKEN was found. Extract to the end of END_TOKEN
- TextUtil.textAppendNoNull(aggValue, remainder.substring(0, end + endToken.length()), false);
+ TextUtil.textAppendNoNull(aggValue, remainder.substring(0, end + endToken.length()),
+ false);
// Remove from remainder up to the end of END_TOKEN
remainder.delete(0, end + endToken.length());
return true;
@@ -167,5 +173,5 @@ public class AggregatingRecordReader extends LongLineRecordReader {
}
return false;
}
-
+
}
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java
index a4da0ad..29ace56 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java
@@ -34,77 +34,78 @@ public class LfLineReader {
private int bufferLength = 0;
// the current position in the buffer
private int bufferPosn = 0;
-
+
private static final byte LF = '\n';
-
+
/**
* Create a line reader that reads from the given stream using the default buffer-size (64k).
- *
+ *
* @param in
* The input stream
- * @throws IOException
*/
public LfLineReader(InputStream in) {
this(in, DEFAULT_BUFFER_SIZE);
}
-
+
/**
* Create a line reader that reads from the given stream using the given buffer-size.
- *
+ *
* @param in
* The input stream
* @param bufferSize
* Size of the read buffer
- * @throws IOException
*/
public LfLineReader(InputStream in, int bufferSize) {
this.in = in;
this.bufferSize = bufferSize;
this.buffer = new byte[this.bufferSize];
}
-
+
/**
- * Create a line reader that reads from the given stream using the <code>io.file.buffer.size</code> specified in the given <code>Configuration</code>.
- *
+ * Create a line reader that reads from the given stream using the
+ * <code>io.file.buffer.size</code> specified in the given <code>Configuration</code>.
+ *
* @param in
* input stream
* @param conf
* configuration
- * @throws IOException
*/
public LfLineReader(InputStream in, Configuration conf) throws IOException {
this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE));
}
-
+
/**
* Close the underlying stream.
- *
- * @throws IOException
*/
public void close() throws IOException {
in.close();
}
-
+
/**
- * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF). EOF also terminates an otherwise unterminated line.
- *
+ * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF).
+ * EOF also terminates an otherwise unterminated line.
+ *
* @param str
* the object to store the given line (without newline)
* @param maxLineLength
- * the maximum number of bytes to store into str; the rest of the line is silently discarded.
+ * the maximum number of bytes to store into str; the rest of the line is silently
+ * discarded.
* @param maxBytesToConsume
- * the maximum number of bytes to consume in this call. This is only a hint, because if the line cross this threshold, we allow it to happen. It can
- * overshoot potentially by as much as one buffer length.
- *
+ * the maximum number of bytes to consume in this call. This is only a hint, because if
+ * the line cross this threshold, we allow it to happen. It can overshoot potentially by
+ * as much as one buffer length.
+ *
* @return the number of bytes read including the (longest) newline found.
- *
+ *
* @throws IOException
* if the underlying stream throws
*/
public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
/*
- * We're reading data from in, but the head of the stream may be already buffered in buffer, so we have several cases: 1. No newline characters are in the
- * buffer, so we need to copy everything and read another buffer from the stream. 2. An unambiguously terminated line is in buffer, so we just copy to str.
+ * We're reading data from in, but the head of the stream may be already buffered in buffer, so
+ * we have several cases: 1. No newline characters are in the buffer, so we need to copy
+ * everything and read another buffer from the stream. 2. An unambiguously terminated line is in
+ * buffer, so we just copy to str.
*/
str.clear();
int txtLength = 0; // tracks str.getLength(), as an optimization
@@ -115,8 +116,9 @@ public class LfLineReader {
if (bufferPosn >= bufferLength) {
startPosn = bufferPosn = 0;
bufferLength = in.read(buffer);
- if (bufferLength <= 0)
+ if (bufferLength <= 0) {
break; // EOF
+ }
}
for (; bufferPosn < bufferLength; ++bufferPosn) { // search for newline
if (buffer[bufferPosn] == LF) {
@@ -136,15 +138,16 @@ public class LfLineReader {
txtLength += appendLength;
}
} while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);
-
- if (bytesConsumed > Integer.MAX_VALUE)
+
+ if (bytesConsumed > Integer.MAX_VALUE) {
throw new IOException("Too many bytes before newline: " + bytesConsumed);
+ }
return (int) bytesConsumed;
}
-
+
/**
* Read from the InputStream into the given Text.
- *
+ *
* @param str
* the object to store the given line
* @param maxLineLength
@@ -156,10 +159,10 @@ public class LfLineReader {
public int readLine(Text str, int maxLineLength) throws IOException {
return readLine(str, maxLineLength, Integer.MAX_VALUE);
}
-
+
/**
* Read from the InputStream into the given Text.
- *
+ *
* @param str
* the object to store the given line
* @return the number of bytes read including the newline
@@ -169,5 +172,5 @@ public class LfLineReader {
public int readLine(Text str) throws IOException {
return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE);
}
-
+
}
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
index 1623d55..4471b10 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
@@ -23,10 +23,10 @@ import org.apache.accumulo.core.iterators.user.SummingCombiner;
import org.apache.hadoop.io.Text;
public class TextUtil {
-
+
/**
* Appends a null byte followed by the UTF-8 bytes of the given string to the given {@link Text}
- *
+ *
* @param text
* the Text to which to append
* @param string
@@ -36,32 +36,32 @@ public class TextUtil {
appendNullByte(text);
textAppendNoNull(text, string);
}
-
+
public static void textAppend(Text text, String string, boolean replaceBadChar) {
appendNullByte(text);
textAppendNoNull(text, string, replaceBadChar);
}
-
+
public static void textAppend(Text t, long s) {
t.append(nullByte, 0, 1);
t.append(SummingCombiner.FIXED_LEN_ENCODER.encode(s), 0, 8);
}
-
+
private static final byte[] nullByte = {0};
-
+
/**
* Appends a null byte to the given text
- *
+ *
* @param text
* the text to which to append the null byte
*/
public static void appendNullByte(Text text) {
text.append(nullByte, 0, nullByte.length);
}
-
+
/**
* Appends the UTF-8 bytes of the given string to the given {@link Text}
- *
+ *
* @param t
* the Text to which to append
* @param s
@@ -70,13 +70,9 @@ public class TextUtil {
public static void textAppendNoNull(Text t, String s) {
textAppendNoNull(t, s, false);
}
-
+
/**
* Appends the UTF-8 bytes of the given string to the given {@link Text}
- *
- * @param t
- * @param s
- * @param replaceBadChar
*/
public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) {
try {
@@ -86,11 +82,11 @@ public class TextUtil {
throw new IllegalArgumentException(cce);
}
}
-
+
/**
- * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to UTF-8 and is much faster than calling
- * {@link String#getBytes(String)}.
- *
+ * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to
+ * UTF-8 and is much faster than calling {@link String#getBytes(String)}.
+ *
* @param string
* the string to convert
* @return the UTF-8 representation of the string
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
index f6b2791..777d7b6 100644
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
+++ b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
@@ -23,11 +23,10 @@ import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
-import junit.framework.Assert;
-
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.junit.Assert;
import org.junit.Test;
public class WikipediaInputSplitTest {
@@ -44,22 +43,22 @@ public class WikipediaInputSplitTest {
split.write(out);
out.close();
baos.close();
-
+
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
DataInput in = new ObjectInputStream(bais);
-
+
WikipediaInputSplit split2 = new WikipediaInputSplit();
split2.readFields(in);
Assert.assertTrue(bais.available() == 0);
bais.close();
-
+
Assert.assertTrue(split.getPartition() == split2.getPartition());
-
+
FileSplit fSplit2 = split2.getFileSplit();
Assert.assertTrue(fSplit.getPath().equals(fSplit2.getPath()));
Assert.assertTrue(fSplit.getStart() == fSplit2.getStart());
Assert.assertTrue(fSplit.getLength() == fSplit2.getLength());
-
+
String[] hosts2 = fSplit2.getLocations();
Assert.assertEquals(hosts.length, hosts2.length);
for (int i = 0; i < hosts.length; i++) {
diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
index 7297b5a..3c87f3f 100644
--- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
+++ b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
@@ -20,13 +20,12 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import junit.framework.Assert;
-
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight;
import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight.Info.Builder;
import org.junit.After;
+import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
@@ -35,72 +34,72 @@ import com.google.protobuf.InvalidProtocolBufferException;
public class TextIndexTest {
private TextIndexCombiner combiner;
private List<Value> values;
-
+
@Before
public void setup() throws Exception {
combiner = new TextIndexCombiner();
combiner.init(null, Collections.singletonMap("all", "true"), null);
- values = new ArrayList<Value>();
+ values = new ArrayList<>();
}
-
+
@After
public void cleanup() {
-
+
}
-
+
private TermWeight.Info.Builder createBuilder() {
return TermWeight.Info.newBuilder();
}
-
+
@Test
public void testSingleValue() throws InvalidProtocolBufferException {
Builder builder = createBuilder();
builder.addWordOffset(1);
builder.addWordOffset(5);
builder.setNormalizedTermFrequency(0.1f);
-
+
values.add(new Value(builder.build().toByteArray()));
-
+
Value result = combiner.reduce(new Key(), values.iterator());
-
+
TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-
+
Assert.assertTrue(info.getNormalizedTermFrequency() == 0.1f);
-
+
List<Integer> offsets = info.getWordOffsetList();
Assert.assertTrue(offsets.size() == 2);
Assert.assertTrue(offsets.get(0) == 1);
Assert.assertTrue(offsets.get(1) == 5);
}
-
+
@Test
public void testAggregateTwoValues() throws InvalidProtocolBufferException {
Builder builder = createBuilder();
builder.addWordOffset(1);
builder.addWordOffset(5);
builder.setNormalizedTermFrequency(0.1f);
-
+
values.add(new Value(builder.build().toByteArray()));
-
+
builder = createBuilder();
builder.addWordOffset(3);
builder.setNormalizedTermFrequency(0.05f);
-
+
values.add(new Value(builder.build().toByteArray()));
-
+
Value result = combiner.reduce(new Key(), values.iterator());
-
+
TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-
+
Assert.assertTrue(info.getNormalizedTermFrequency() == 0.15f);
-
+
List<Integer> offsets = info.getWordOffsetList();
Assert.assertTrue(offsets.size() == 3);
Assert.assertTrue(offsets.get(0) == 1);
Assert.assertTrue(offsets.get(1) == 3);
Assert.assertTrue(offsets.get(2) == 5);
}
-
+
@Test
public void testAggregateManyValues() throws InvalidProtocolBufferException {
Builder builder = createBuilder();
@@ -108,28 +107,28 @@ public class TextIndexTest {
builder.addWordOffset(15);
builder.addWordOffset(19);
builder.setNormalizedTermFrequency(0.12f);
-
+
values.add(new Value(builder.build().toByteArray()));
-
+
builder = createBuilder();
builder.addWordOffset(1);
builder.addWordOffset(5);
builder.setNormalizedTermFrequency(0.1f);
-
+
values.add(new Value(builder.build().toByteArray()));
-
+
builder = createBuilder();
builder.addWordOffset(3);
builder.setNormalizedTermFrequency(0.05f);
-
+
values.add(new Value(builder.build().toByteArray()));
-
+
Value result = combiner.reduce(new Key(), values.iterator());
-
+
TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-
+
Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f);
-
+
List<Integer> offsets = info.getWordOffsetList();
Assert.assertTrue(offsets.size() == 6);
Assert.assertTrue(offsets.get(0) == 1);
@@ -139,7 +138,7 @@ public class TextIndexTest {
Assert.assertTrue(offsets.get(4) == 15);
Assert.assertTrue(offsets.get(5) == 19);
}
-
+
@Test
public void testEmptyValue() throws InvalidProtocolBufferException {
Builder builder = createBuilder();
@@ -147,32 +146,32 @@ public class TextIndexTest {
builder.addWordOffset(15);
builder.addWordOffset(19);
builder.setNormalizedTermFrequency(0.12f);
-
+
values.add(new Value("".getBytes()));
values.add(new Value(builder.build().toByteArray()));
values.add(new Value("".getBytes()));
-
+
builder = createBuilder();
builder.addWordOffset(1);
builder.addWordOffset(5);
builder.setNormalizedTermFrequency(0.1f);
-
+
values.add(new Value(builder.build().toByteArray()));
values.add(new Value("".getBytes()));
-
+
builder = createBuilder();
builder.addWordOffset(3);
builder.setNormalizedTermFrequency(0.05f);
-
+
values.add(new Value(builder.build().toByteArray()));
values.add(new Value("".getBytes()));
-
+
Value result = combiner.reduce(new Key(), values.iterator());
-
+
TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-
+
Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f);
-
+
List<Integer> offsets = info.getWordOffsetList();
Assert.assertTrue(offsets.size() == 6);
Assert.assertTrue(offsets.get(0) == 1);
diff --git a/pom.xml b/pom.xml
index d62928a..fdaba5e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -19,14 +19,14 @@
<modelVersion>4.0.0</modelVersion>
<parent>
- <groupId>org.apache</groupId>
- <artifactId>apache</artifactId>
- <version>18</version>
+ <groupId>org.apache</groupId>
+ <artifactId>apache</artifactId>
+ <version>21</version>
</parent>
<groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-wikisearch</artifactId>
- <version>1.8.0</version>
+ <version>2.0.0-SNAPSHOT</version>
<packaging>pom</packaging>
<name>accumulo-wikisearch</name>
@@ -37,15 +37,18 @@
</modules>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- <version.accumulo>${project.version}</version.accumulo>
+ <maven.compiler.source>1.8</maven.compiler.source>
+ <maven.compiler.target>1.8</maven.compiler.target>
+ <version.accumulo>2.0.0-alpha-2</version.accumulo>
<version.collections>3.2.1</version.collections>
<version.commons-codec>1.5</version.commons-codec>
+ <version.commons-configuration>1.10</version.commons-configuration>
<version.commons-jexl>2.0.1</version.commons-jexl>
<version.commons-lang>2.4</version.commons-lang>
<version.ejb-spec-api>1.0.1.Final</version.ejb-spec-api>
- <version.guava>11.0.2</version.guava>
- <version.hadoop>2.6.4</version.hadoop>
- <version.htrace>3.1.0-incubating</version.htrace>
+ <version.guava>27.0-jre</version.guava>
+ <version.hadoop>3.1.1</version.hadoop>
+ <version.htrace>3.2.0-incubating</version.htrace>
<version.jaxrs>2.1.0.GA</version.jaxrs>
<version.junit>4.11</version.junit>
<version.kryo>1.04</version.kryo>
@@ -56,50 +59,71 @@
<version.lucene-wikipedia>3.0.2</version.lucene-wikipedia>
<version.minlog>1.2</version.minlog>
<version.protobuf>2.5.0</version.protobuf>
- <version.thrift>0.9.3</version.thrift>
- <version.zookeeper>3.3.1</version.zookeeper>
+ <version.thrift>0.12.0</version.thrift>
+ <version.zookeeper>3.4.14</version.zookeeper>
</properties>
<dependencyManagement>
<dependencies>
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-fate</artifactId>
- <version>${version.accumulo}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <version>${version.hadoop}</version>
- </dependency>
- <dependency>
- <groupId>commons-collections</groupId>
- <artifactId>commons-collections</artifactId>
- <version>${version.collections}</version>
- </dependency>
- <dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.1.1</version>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-math3</artifactId>
- <version>3.6.1</version>
- </dependency>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>${version.junit}</version>
- </dependency>
- <dependency>
+ <dependency>
+ <groupId>javaee</groupId>
+ <artifactId>javaee-api</artifactId>
+ <version>5</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${version.hadoop}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.kerby</groupId>
+ <artifactId>kerb-simplekdc</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.htrace</groupId>
+ <artifactId>htrace-core</artifactId>
+ <version>${version.htrace}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ <version>3.0.2</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ <version>${version.collections}</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>2.5</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <version>1.1.1</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ <version>3.6.1</version>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>${version.junit}</version>
+ </dependency>
+ <dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>${version.zookeeper}</version>
<exclusions>
- <exclusion>
- <groupId>jline</groupId>
- <artifactId>jline</artifactId>
- </exclusion>
+ <exclusion>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ </exclusion>
</exclusions>
</dependency>
<dependency>
@@ -149,6 +173,11 @@
<version>${version.commons-codec}</version>
</dependency>
<dependency>
+ <groupId>commons-configuration</groupId>
+ <artifactId>commons-configuration</artifactId>
+ <version>${version.commons-configuration}</version>
+ </dependency>
+ <dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>${version.commons-lang}</version>
@@ -176,6 +205,11 @@
<version>${version.accumulo}</version>
</dependency>
<dependency>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>wikisearch-ingest</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-jexl</artifactId>
<version>${version.commons-jexl}</version>
@@ -210,11 +244,11 @@
<artifactId>libthrift</artifactId>
<version>${version.thrift}</version>
<exclusions>
- <!-- excluded to make the enforcer plug in happy-->
- <exclusion>
- <groupId>org.apache.httpcomponents</groupId>
- <artifactId>httpcore</artifactId>
- </exclusion>
+ <!-- excluded to make the enforcer plug in happy-->
+ <exclusion>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpcore</artifactId>
+ </exclusion>
</exclusions>
</dependency>
<dependency>
@@ -232,6 +266,21 @@
<artifactId>jackson-mapper-asl</artifactId>
<version>1.9.13</version>
</dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ <version>2.7.8</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.8.5</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-beanutils</groupId>
+ <artifactId>commons-beanutils</artifactId>
+ <version>1.9.3</version>
+ </dependency>
</dependencies>
</dependencyManagement>
<repositories>
@@ -239,15 +288,6 @@
<snapshots>
<enabled>false</enabled>
</snapshots>
- <id>central</id>
- <name>Maven Repository Switchboard</name>
- <url>http://repo1.maven.org/maven2</url>
- <layout>default</layout>
- </repository>
- <repository>
- <snapshots>
- <enabled>false</enabled>
- </snapshots>
<id>java.net</id>
<name>java.net</name>
<url>https://maven.java.net/content/groups/public</url>
@@ -255,21 +295,19 @@
</repository>
</repositories>
<build>
- <defaultGoal>package</defaultGoal>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-ejb-plugin</artifactId>
+ <version>2.1</version>
+ </plugin>
+ </plugins>
+ </pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>3.1</version>
- <configuration>
- <source>1.7</source>
- <target>1.7</target>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
- <version>1.3.1</version>
<executions>
<execution>
<id>enforce-mvn</id>
@@ -287,7 +325,6 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-clean-plugin</artifactId>
- <version>2.5</version>
<configuration>
<filesets>
<fileset>
@@ -302,7 +339,6 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
- <version>2.4</version>
<configuration>
<outputDirectory>lib</outputDirectory>
<archive>
@@ -321,42 +357,25 @@
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-resources-plugin</artifactId>
- <version>2.6</version>
- <configuration>
- <encoding>UTF-8</encoding>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
- <version>2.9.1</version>
<configuration>
- <encoding>UTF-8</encoding>
<quiet>true</quiet>
<jarOutputDirectory>lib</jarOutputDirectory>
<reportOutputDirectory>docs</reportOutputDirectory>
- <javadocVersion>1.6</javadocVersion>
+ <javadocVersion>1.8</javadocVersion>
<additionalJOption>-J-Xmx512m</additionalJOption>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
- <version>2.2.1</version>
<configuration>
<outputDirectory>lib</outputDirectory>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>2.17</version>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
- <version>2.8</version>
<inherited>false</inherited>
<executions>
<execution>
@@ -367,7 +386,7 @@
<phase>prepare-package</phase>
<configuration>
<outputDirectory>../../lib</outputDirectory>
- <!-- just grab the non-provided runtime dependencies -->
+ <!-- just grab the non-provided runtime dependencies -->
<includeArtifactIds>commons-collections,commons-configuration,commons-io,commons-lang,jline,log4j,libthrift,commons-jci-core,commons-jci-fam,commons-logging,commons-logging-api</includeArtifactIds>
<excludeGroupIds>accumulo</excludeGroupIds>
<excludeTransitive>true</excludeTransitive>
diff --git a/query-war/pom.xml b/query-war/pom.xml
index 45a1f1b..c1404fd 100644
--- a/query-war/pom.xml
+++ b/query-war/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-wikisearch</artifactId>
- <version>1.8.0</version>
+ <version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>wikisearch-query-war</artifactId>
@@ -30,17 +30,7 @@
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>3.1</version>
- <configuration>
- <source>1.6</source>
- <target>1.6</target>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-war-plugin</artifactId>
- <version>2.1.1</version>
<configuration>
<failOnMissingWebXml>true</failOnMissingWebXml>
</configuration>
diff --git a/query/pom.xml b/query/pom.xml
index e022965..48b2477 100644
--- a/query/pom.xml
+++ b/query/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-wikisearch</artifactId>
- <version>1.8.0</version>
+ <version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>wikisearch-query</artifactId>
<packaging>ejb</packaging>
@@ -53,7 +53,6 @@
<dependency>
<groupId>commons-configuration</groupId>
<artifactId>commons-configuration</artifactId>
- <version>1.6</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
@@ -66,7 +65,6 @@
<dependency>
<groupId>org.apache.accumulo</groupId>
<artifactId>wikisearch-ingest</artifactId>
- <version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
@@ -79,7 +77,6 @@
<dependency>
<groupId>javaee</groupId>
<artifactId>javaee-api</artifactId>
- <version>5</version>
<scope>provided</scope>
</dependency>
<dependency>
@@ -90,23 +87,11 @@
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
- <version>2.4</version>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-fate</artifactId>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.accumulo</groupId>
- <artifactId>accumulo-trace</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.htrace</groupId>
<artifactId>htrace-core</artifactId>
- <version>${version.htrace}</version>
<scope>runtime</scope>
</dependency>
<dependency>
@@ -155,27 +140,15 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
- <version>2.4</version>
<configuration>
<descriptors>
<descriptor>src/assembly/dist.xml</descriptor>
</descriptors>
- <tarLongFileMode>gnu</tarLongFileMode>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>3.1</version>
- <configuration>
- <source>1.6</source>
- <target>1.6</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-ejb-plugin</artifactId>
- <version>2.1</version>
<configuration>
<ejbVersion>3.1</ejbVersion>
</configuration>
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java
index 87b4da2..73319fd 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java
@@ -38,33 +38,39 @@ import org.apache.accumulo.examples.wikisearch.parser.QueryEvaluator;
import org.apache.commons.jexl2.parser.ParseException;
import org.apache.log4j.Logger;
-
import com.esotericsoftware.kryo.Kryo;
/**
- *
- * This iterator aggregates rows together using the specified key comparator. Subclasses will provide their own implementation of fillMap which will fill the
- * supplied EventFields object with field names (key) and field values (value). After all fields have been put into the aggregated object (by aggregating all
- * columns with the same key), the EventFields object will be compared against the supplied expression. If the expression returns true, then the return key and
- * return value can be retrieved via getTopKey() and getTopValue().
- *
- * Optionally, the caller can set an expression (field operator value) that should not be evaluated against the event. For example, if the query is
- * "A == 'foo' and B == 'bar'", but for some reason B may not be in the data, then setting the UNEVALUATED_EXPRESSIONS option to "B == 'bar'" will allow the
- * events to be evaluated against the remainder of the expression and still return as true.
- *
- * By default this iterator will return all Events in the shard. If the START_DATE and END_DATE are specified, then this iterator will evaluate the timestamp of
- * the key against the start and end dates. If the event date is not within the range of start to end, then it is skipped.
- *
- * This iterator will return up the stack an EventFields object serialized using Kryo in the cell Value.
- *
+ *
+ * This iterator aggregates rows together using the specified key comparator. Subclasses will
+ * provide their own implementation of fillMap which will fill the supplied EventFields object with
+ * field names (key) and field values (value). After all fields have been put into the aggregated
+ * object (by aggregating all columns with the same key), the EventFields object will be compared
+ * against the supplied expression. If the expression returns true, then the return key and return
+ * value can be retrieved via getTopKey() and getTopValue().
+ *
+ * Optionally, the caller can set an expression (field operator value) that should not be evaluated
+ * against the event. For example, if the query is "A == 'foo' and B == 'bar'", but for some reason
+ * B may not be in the data, then setting the UNEVALUATED_EXPRESSIONS option to "B == 'bar'" will
+ * allow the events to be evaluated against the remainder of the expression and still return as
+ * true.
+ *
+ * By default this iterator will return all Events in the shard. If the START_DATE and END_DATE are
+ * specified, then this iterator will evaluate the timestamp of the key against the start and end
+ * dates. If the event date is not within the range of start to end, then it is skipped.
+ *
+ * This iterator will return up the stack an EventFields object serialized using Kryo in the cell
+ * Value.
+ *
*/
-public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterator<Key,Value>, OptionDescriber {
-
+public abstract class AbstractEvaluatingIterator
+ implements SortedKeyValueIterator<Key,Value>, OptionDescriber {
+
private static Logger log = Logger.getLogger(AbstractEvaluatingIterator.class);
protected static final byte[] NULL_BYTE = new byte[0];
public static final String QUERY_OPTION = "expr";
public static final String UNEVALUTED_EXPRESSIONS = "unevaluated.expressions";
-
+
private PartialKey comparator = null;
protected SortedKeyValueIterator<Key,Value> iterator;
private Key currentKey = new Key();
@@ -76,34 +82,35 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat
private static Kryo kryo = new Kryo();
private Range seekRange = null;
private Set<String> skipExpressions = null;
-
+
protected AbstractEvaluatingIterator(AbstractEvaluatingIterator other, IteratorEnvironment env) {
iterator = other.iterator.deepCopy(env);
event = other.event;
}
-
+
public AbstractEvaluatingIterator() {}
-
+
/**
- * Implementations will return the PartialKey value to use for comparing keys for aggregating events
- *
+ * Implementations will return the PartialKey value to use for comparing keys for aggregating
+ * events
+ *
* @return the type of comparator to use
*/
public abstract PartialKey getKeyComparator();
-
+
/**
- * When the query expression evaluates to true against the event, the event fields will be serialized into the Value and returned up the iterator stack.
- * Implemenations will need to provide a key to be used with the event.
- *
- * @param k
+ * When the query expression evaluates to true against the event, the event fields will be
+ * serialized into the Value and returned up the iterator stack. Implemenations will need to
+ * provide a key to be used with the event.
+ *
* @return the key that should be returned with the map of values.
*/
public abstract Key getReturnKey(Key k) throws Exception;
-
+
/**
- * Implementations will need to fill the map with field visibilities, names, and values. When all fields have been aggregated the event will be evaluated
- * against the query expression.
- *
+ * Implementations will need to fill the map with field visibilities, names, and values. When all
+ * fields have been aggregated the event will be evaluated against the query expression.
+ *
* @param event
* Multimap of event names and fields.
* @param key
@@ -112,44 +119,43 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat
* current Value
*/
public abstract void fillMap(EventFields event, Key key, Value value) throws Exception;
-
+
/**
- * Provides the ability to skip this key and all of the following ones that match using the comparator.
- *
- * @param key
+ * Provides the ability to skip this key and all of the following ones that match using the
+ * comparator.
+ *
* @return true if the key should be acted upon, otherwise false.
- * @throws IOException
*/
public abstract boolean isKeyAccepted(Key key) throws IOException;
-
+
/**
* Reset state.
*/
public void reset() {
event.clear();
}
-
+
private void aggregateRowColumn(EventFields event) throws IOException {
-
+
currentKey.set(iterator.getTopKey());
-
+
try {
fillMap(event, iterator.getTopKey(), iterator.getTopValue());
iterator.next();
-
+
while (iterator.hasTop() && iterator.getTopKey().equals(currentKey, this.comparator)) {
fillMap(event, iterator.getTopKey(), iterator.getTopValue());
iterator.next();
}
-
+
// Get the return key
returnKey = getReturnKey(currentKey);
} catch (Exception e) {
throw new IOException("Error aggregating event", e);
}
-
+
}
-
+
private void findTop() throws IOException {
do {
reset();
@@ -161,10 +167,10 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat
while (iterator.hasTop() && !isKeyAccepted(iterator.getTopKey())) {
iterator.next();
}
-
+
if (iterator.hasTop()) {
aggregateRowColumn(event);
-
+
// Evaluate the event against the expression
if (event.size() > 0 && this.evaluator.evaluate(event)) {
if (log.isDebugEnabled()) {
@@ -191,33 +197,38 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat
log.debug("Iterator.hasTop() == false");
}
} while (returnValue == null && iterator.hasTop());
-
+
// Sanity check. Make sure both returnValue and returnKey are null or both are not null
- if (!((returnKey == null && returnValue == null) || (returnKey != null && returnValue != null))) {
+ if (!((returnKey == null && returnValue == null)
+ || (returnKey != null && returnValue != null))) {
log.warn("Key: " + ((returnKey == null) ? "null" : returnKey.toString()));
log.warn("Value: " + ((returnValue == null) ? "null" : returnValue.toString()));
throw new IOException("Return values are inconsistent");
}
}
-
+
+ @Override
public Key getTopKey() {
if (returnKey != null) {
return returnKey;
}
return iterator.getTopKey();
}
-
+
+ @Override
public Value getTopValue() {
if (returnValue != null) {
return returnValue;
}
return iterator.getTopValue();
}
-
+
+ @Override
public boolean hasTop() {
return returnKey != null || iterator.hasTop();
}
-
+
+ @Override
public void next() throws IOException {
if (returnKey != null) {
returnKey = null;
@@ -225,53 +236,55 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat
} else if (iterator.hasTop()) {
iterator.next();
}
-
+
findTop();
}
-
+
/**
* Copy of IteratorUtil.maximizeStartKeyTimeStamp due to IllegalAccessError
- *
- * @param range
- * @return
*/
static Range maximizeStartKeyTimeStamp(Range range) {
Range seekRange = range;
-
+
if (range.getStartKey() != null && range.getStartKey().getTimestamp() != Long.MAX_VALUE) {
Key seekKey = new Key(seekRange.getStartKey());
seekKey.setTimestamp(Long.MAX_VALUE);
seekRange = new Range(seekKey, true, range.getEndKey(), range.isEndKeyInclusive());
}
-
+
return seekRange;
}
-
- public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
+
+ @Override
+ public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive)
+ throws IOException {
// do not want to seek to the middle of a value that should be
// aggregated...
-
+
seekRange = maximizeStartKeyTimeStamp(range);
-
+
iterator.seek(seekRange, columnFamilies, inclusive);
findTop();
-
+
if (range.getStartKey() != null) {
- while (hasTop() && getTopKey().equals(range.getStartKey(), this.comparator) && getTopKey().getTimestamp() > range.getStartKey().getTimestamp()) {
+ while (hasTop() && getTopKey().equals(range.getStartKey(), this.comparator)
+ && getTopKey().getTimestamp() > range.getStartKey().getTimestamp()) {
// the value has a more recent time stamp, so
// pass it up
// log.debug("skipping "+getTopKey());
next();
}
-
+
while (hasTop() && range.beforeStartKey(getTopKey())) {
next();
}
}
-
+
}
-
- public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
+
+ @Override
+ public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options,
+ IteratorEnvironment env) throws IOException {
validateOptions(options);
event = new EventFields();
this.comparator = getKeyComparator();
@@ -292,31 +305,36 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat
}
EventFields.initializeKryo(kryo);
}
-
+
+ @Override
public IteratorOptions describeOptions() {
- Map<String,String> options = new HashMap<String,String>();
+ Map<String,String> options = new HashMap<>();
options.put(QUERY_OPTION, "query expression");
options.put(UNEVALUTED_EXPRESSIONS, "comma separated list of expressions to skip");
- return new IteratorOptions(getClass().getSimpleName(), "evaluates event objects against an expression", options, null);
+ return new IteratorOptions(getClass().getSimpleName(),
+ "evaluates event objects against an expression", options, null);
}
-
+
+ @Override
public boolean validateOptions(Map<String,String> options) {
- if (!options.containsKey(QUERY_OPTION))
+ if (!options.containsKey(QUERY_OPTION)) {
return false;
- else
+ } else {
this.expression = options.get(QUERY_OPTION);
-
+ }
+
if (options.containsKey(UNEVALUTED_EXPRESSIONS)) {
String expressionList = options.get(UNEVALUTED_EXPRESSIONS);
if (expressionList != null && !expressionList.trim().equals("")) {
- this.skipExpressions = new HashSet<String>();
- for (String e : expressionList.split(","))
+ this.skipExpressions = new HashSet<>();
+ for (String e : expressionList.split(",")) {
this.skipExpressions.add(e);
+ }
}
}
return true;
}
-
+
public String getQueryExpression() {
return this.expression;
}
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
index 47a55e1..3b03823 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java
@@ -35,7 +35,7 @@ import org.apache.hadoop.io.Text;
import org.apache.log4j.Logger;
public class AndIterator implements SortedKeyValueIterator<Key,Value> {
-
+
protected static final Logger log = Logger.getLogger(AndIterator.class);
private TermSource[] sources;
private int sourcesCount = 0;
@@ -49,27 +49,28 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
private Text currentDocID = new Text(emptyByteArray);
private static boolean SEEK_INCLUSIVE = true;
private Text parentEndRow;
-
+
/**
* Used in representing a Term that is intersected on.
*/
protected static class TermSource {
-
+
public SortedKeyValueIterator<Key,Value> iter;
public Text dataLocation;
public Text term;
public boolean notFlag;
private Collection<ByteSequence> seekColumnFamilies;
-
+
private TermSource(TermSource other) {
this(other.iter, other.dataLocation, other.term, other.notFlag);
}
-
+
public TermSource(SortedKeyValueIterator<Key,Value> iter, Text dataLocation, Text term) {
this(iter, dataLocation, term, false);
}
-
- public TermSource(SortedKeyValueIterator<Key,Value> iter, Text dataLocation, Text term, boolean notFlag) {
+
+ public TermSource(SortedKeyValueIterator<Key,Value> iter, Text dataLocation, Text term,
+ boolean notFlag) {
this.iter = iter;
this.dataLocation = dataLocation;
ByteSequence bs = new ArrayByteSequence(dataLocation.getBytes(), 0, dataLocation.getLength());
@@ -77,75 +78,71 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
this.term = term;
this.notFlag = notFlag;
}
-
+
public String getTermString() {
return (this.term == null) ? new String("Iterator") : this.term.toString();
}
}
-
+
/*
- * | Row | Column Family | Column Qualifier | Value
- * | {RowID} | {dataLocation} | {term}\0{dataType}\0{UID} | Empty
+ * | Row | Column Family | Column Qualifier | Value | {RowID} | {dataLocation} |
+ * {term}\0{dataType}\0{UID} | Empty
*/
protected Text getPartition(Key key) {
return key.getRow();
}
-
+
/**
* Returns the given key's dataLocation
- *
- * @param key
+ *
* @return The given key's dataLocation
*/
protected Text getDataLocation(Key key) {
return key.getColumnFamily();
}
-
+
/**
* Returns the given key's term
- *
- * @param key
+ *
* @return The given key's term
*/
protected Text getTerm(Key key) {
int idx = 0;
String sKey = key.getColumnQualifier().toString();
-
+
idx = sKey.indexOf("\0");
return new Text(sKey.substring(0, idx));
}
-
+
/**
* Returns the given key's DocID
- *
- * @param key
+ *
* @return The given key's DocID
*/
protected Text getDocID(Key key) {
int idx = 0;
String sKey = key.getColumnQualifier().toString();
-
+
idx = sKey.indexOf("\0");
return new Text(sKey.substring(idx + 1));
}
-
+
/**
* Returns the given key's UID
- *
- * @param key
+ *
* @return The given key's UID
*/
protected String getUID(Key key) {
int idx = 0;
String sKey = key.getColumnQualifier().toString();
-
+
idx = sKey.indexOf("\0");
return sKey.substring(idx + 1);
}
-
+
/**
* Build a key from the given row and dataLocation
- *
+ *
* @param row
* The desired row
* @param dataLocation
@@ -155,10 +152,10 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
protected Key buildKey(Text row, Text dataLocation) {
return new Key(row, (dataLocation == null) ? nullText : dataLocation);
}
-
+
/**
* Build a key from the given row, dataLocation, and term
- *
+ *
* @param row
* The desired row
* @param dataLocation
@@ -168,12 +165,13 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
* @return A Key object built from the given row, dataLocation, and term.
*/
protected Key buildKey(Text row, Text dataLocation, Text term) {
- return new Key(row, (dataLocation == null) ? nullText : dataLocation, (term == null) ? nullText : term);
+ return new Key(row, (dataLocation == null) ? nullText : dataLocation,
+ (term == null) ? nullText : term);
}
-
+
/**
* Return the key that directly follows the given key
- *
+ *
* @param key
* The key who will be directly before the returned key
* @return The key directly following the given key.
@@ -181,95 +179,102 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
protected Key buildFollowingPartitionKey(Key key) {
return key.followingKey(PartialKey.ROW);
}
-
+
/**
* Empty default constructor
*/
public AndIterator() {}
-
+
+ @Override
public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
return new AndIterator(this, env);
}
-
+
public AndIterator(AndIterator other, IteratorEnvironment env) {
if (other.sources != null) {
sourcesCount = other.sourcesCount;
sources = new TermSource[sourcesCount];
for (int i = 0; i < sourcesCount; i++) {
- sources[i] = new TermSource(other.sources[i].iter.deepCopy(env), other.sources[i].dataLocation, other.sources[i].term);
+ sources[i] = new TermSource(other.sources[i].iter.deepCopy(env),
+ other.sources[i].dataLocation, other.sources[i].term);
}
}
}
-
+
+ @Override
public Key getTopKey() {
return topKey;
}
-
+
+ @Override
public Value getTopValue() {
return value;
}
-
+
+ @Override
public boolean hasTop() {
return currentRow != null;
}
-
+
/**
- * Find the next key in the current TermSource that is at or beyond the cursor (currentRow, currentTerm, currentDocID).
- *
- * @param sourceID
- * The index of the current source in <code>sources</code>
+ * Find the next key in the current TermSource that is at or beyond the cursor (currentRow,
+ * currentTerm, currentDocID).
+ *
+ * The index of the current source in <code>sources</code>
+ *
* @return True if the source advanced beyond the cursor
- * @throws IOException
*/
private boolean seekOneSource(TermSource ts) throws IOException {
/*
- * Within this loop progress must be made in one of the following forms: - currentRow, currentTerm, or curretDocID must be increased - the given source must
- * advance its iterator This loop will end when any of the following criteria are met - the iterator for the given source is pointing to the key
- * (currentRow, columnFamilies[sourceID], currentTerm, currentDocID) - the given source is out of data and currentRow is set to null - the given source has
- * advanced beyond the endRow and currentRow is set to null
+ * Within this loop progress must be made in one of the following forms: - currentRow,
+ * currentTerm, or curretDocID must be increased - the given source must advance its iterator
+ * This loop will end when any of the following criteria are met - the iterator for the given
+ * source is pointing to the key (currentRow, columnFamilies[sourceID], currentTerm,
+ * currentDocID) - the given source is out of data and currentRow is set to null - the given
+ * source has advanced beyond the endRow and currentRow is set to null
*/
-
+
// precondition: currentRow is not null
boolean advancedCursor = false;
-
+
while (true) {
if (ts.iter.hasTop() == false) {
if (log.isDebugEnabled()) {
log.debug("The current iterator no longer has a top");
}
-
+
// If we got to the end of an iterator, found a Match if it's a NOT
if (ts.notFlag) {
break;
}
-
+
currentRow = null;
// setting currentRow to null counts as advancing the cursor
return true;
}
-
+
// check if we're past the end key
int endCompare = -1;
-
+
if (log.isDebugEnabled()) {
log.debug("Current topKey = " + ts.iter.getTopKey());
}
-
+
// we should compare the row to the end of the range
if (overallRange.getEndKey() != null) {
if (log.isDebugEnabled()) {
log.debug("II.seekOneSource overallRange.getEndKey() != null");
}
-
+
endCompare = overallRange.getEndKey().getRow().compareTo(ts.iter.getTopKey().getRow());
-
+
if ((!overallRange.isEndKeyInclusive() && endCompare <= 0) || endCompare < 0) {
if (log.isDebugEnabled()) {
log.debug("II.seekOneSource at the end of the tablet server");
}
-
+
currentRow = null;
-
+
// setting currentRow to null counts as advancing the cursor
return true;
}
@@ -278,81 +283,84 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
log.debug("II.seekOneSource overallRange.getEndKey() == null");
}
}
-
+
// Compare the Row IDs
int partitionCompare = currentRow.compareTo(getPartition(ts.iter.getTopKey()));
if (log.isDebugEnabled()) {
log.debug("Current partition: " + currentRow);
}
-
+
// check if this source is already at or beyond currentRow
// if not, then seek to at least the current row
if (partitionCompare > 0) {
if (log.isDebugEnabled()) {
log.debug("Need to seek to the current row");
-
+
// seek to at least the currentRow
log.debug("ts.dataLocation = " + ts.dataLocation.getBytes());
log.debug("Term = " + new Text(ts.term + "\0" + currentDocID).getBytes());
}
-
- Key seekKey = buildKey(currentRow, ts.dataLocation, nullText);// new Text(ts.term + "\0" + currentDocID));
-
+
+ Key seekKey = buildKey(currentRow, ts.dataLocation, nullText);// new Text(ts.term + "\0" +
+ // currentDocID));
+
if (log.isDebugEnabled()) {
log.debug("Seeking to: " + seekKey);
}
ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
continue;
}
-
+
// check if this source has gone beyond currentRow
// if so, advance currentRow
if (partitionCompare < 0) {
if (log.isDebugEnabled()) {
log.debug("Went too far beyond the currentRow");
}
-
+
if (ts.notFlag) {
break;
}
-
+
currentRow.set(getPartition(ts.iter.getTopKey()));
currentDocID.set(emptyByteArray);
-
+
advancedCursor = true;
continue;
}
-
+
// we have verified that the current source is positioned in currentRow
// now we must make sure we're in the right columnFamily in the current row
if (ts.dataLocation != null) {
int dataLocationCompare = ts.dataLocation.compareTo(getDataLocation(ts.iter.getTopKey()));
-
+
if (log.isDebugEnabled()) {
log.debug("Comparing dataLocations");
log.debug("dataLocation = " + ts.dataLocation);
log.debug("newDataLocation = " + getDataLocation(ts.iter.getTopKey()));
}
-
+
// check if this source is already on the right columnFamily
// if not, then seek forwards to the right columnFamily
if (dataLocationCompare > 0) {
if (log.isDebugEnabled()) {
log.debug("Need to seek to the right dataLocation");
}
-
- Key seekKey = buildKey(currentRow, ts.dataLocation, nullText);// , new Text(ts.term + "\0" + currentDocID));
-
+
+ Key seekKey = buildKey(currentRow, ts.dataLocation, nullText);// , new Text(ts.term + "\0"
+ // + currentDocID));
+
if (log.isDebugEnabled()) {
log.debug("Seeking to: " + seekKey);
}
-
- ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
+
+ ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies,
+ SEEK_INCLUSIVE);
if (!ts.iter.hasTop()) {
currentRow = null;
return true;
}
-
+
continue;
}
// check if this source is beyond the right columnFamily
@@ -361,26 +369,27 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
if (log.isDebugEnabled()) {
log.debug("Went too far beyond the dataLocation");
}
-
+
if (endCompare == 0) {
// we're done
currentRow = null;
-
+
// setting currentRow to null counts as advancing the cursor
return true;
}
-
+
// Seeking beyond the current dataLocation gives a valid negated result
if (ts.notFlag) {
break;
}
-
+
Key seekKey = buildFollowingPartitionKey(ts.iter.getTopKey());
-
+
if (log.isDebugEnabled()) {
log.debug("Seeking to: " + seekKey);
}
- ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
+ ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies,
+ SEEK_INCLUSIVE);
if (!ts.iter.hasTop()) {
currentRow = null;
return true;
@@ -388,164 +397,172 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
continue;
}
}
-
+
// Compare the Terms
int termCompare = ts.term.compareTo(getTerm(ts.iter.getTopKey()));
if (log.isDebugEnabled()) {
log.debug("term = " + ts.term);
log.debug("newTerm = " + getTerm(ts.iter.getTopKey()));
}
-
+
// We need to seek down farther into the data
if (termCompare > 0) {
if (log.isDebugEnabled()) {
log.debug("Need to seek to the right term");
}
- Key seekKey = buildKey(currentRow, ts.dataLocation, new Text(ts.term + "\0"));// new Text(ts.term + "\0" + currentDocID));
-
+ Key seekKey = buildKey(currentRow, ts.dataLocation, new Text(ts.term + "\0"));// new
+ // Text(ts.term
+ // + "\0" +
+ // currentDocID));
+
if (log.isDebugEnabled()) {
log.debug("Seeking to: " + seekKey);
}
-
+
ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
if (!ts.iter.hasTop()) {
currentRow = null;
return true;
}
-
+
// currentTerm = getTerm(ts.iter.getTopKey());
-
+
if (log.isDebugEnabled()) {
log.debug("topKey after seeking to correct term: " + ts.iter.getTopKey());
}
-
+
continue;
}
-
- // We've jumped out of the current term, set the new term as currentTerm and start looking again
+
+ // We've jumped out of the current term, set the new term as currentTerm and start looking
+ // again
if (termCompare < 0) {
if (log.isDebugEnabled()) {
log.debug("TERM: Need to jump to the next row");
}
-
+
if (endCompare == 0) {
currentRow = null;
-
+
return true;
}
-
+
if (ts.notFlag) {
break;
}
-
+
Key seekKey = buildFollowingPartitionKey(ts.iter.getTopKey());
if (log.isDebugEnabled()) {
log.debug("Using this key to find the next key: " + ts.iter.getTopKey());
log.debug("Seeking to: " + seekKey);
}
-
+
ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
-
+
if (!ts.iter.hasTop()) {
currentRow = null;
return true;
}
-
+
currentTerm = getTerm(ts.iter.getTopKey());
-
+
continue;
}
-
+
// Compare the DocIDs
Text docid = getDocID(ts.iter.getTopKey());
int docidCompare = currentDocID.compareTo(docid);
-
+
if (log.isDebugEnabled()) {
log.debug("Comparing DocIDs");
log.debug("currentDocID = " + currentDocID);
log.debug("docid = " + docid);
}
-
+
// The source isn't at the right DOC
if (docidCompare > 0) {
if (log.isDebugEnabled()) {
log.debug("Need to seek to the correct docid");
}
-
+
// seek forwards
- Key seekKey = buildKey(currentRow, ts.dataLocation, new Text(ts.term + "\0" + currentDocID));
-
+ Key seekKey =
+ buildKey(currentRow, ts.dataLocation, new Text(ts.term + "\0" + currentDocID));
+
if (log.isDebugEnabled()) {
log.debug("Seeking to: " + seekKey);
}
-
+
ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE);
-
+
continue;
}
-
- // if this source has advanced beyond the current column qualifier then advance currentCQ and return true
+
+ // if this source has advanced beyond the current column qualifier then advance currentCQ and
+ // return true
if (docidCompare < 0) {
if (ts.notFlag) {
break;
}
-
+
if (log.isDebugEnabled()) {
- log.debug("We went too far, update the currentDocID to be the location of where were seek'ed to");
+ log.debug(
+ "We went too far, update the currentDocID to be the location of where were seek'ed to");
}
-
+
currentDocID.set(docid);
advancedCursor = true;
break;
}
-
+
// Set the term as currentTerm (in case we found this record on the first try)
currentTerm = getTerm(ts.iter.getTopKey());
-
+
if (log.isDebugEnabled()) {
log.debug("currentTerm = " + currentTerm);
}
-
+
// If we're negated, next() the first TermSource since we guaranteed it was not a NOT term
if (ts.notFlag) {
sources[0].iter.next();
advancedCursor = true;
}
-
+
// If we got here, we have a match
break;
}
-
+
return advancedCursor;
}
-
+
+ @Override
public void next() throws IOException {
if (log.isDebugEnabled()) {
log.debug("In ModifiedIntersectingIterator.next()");
}
-
+
if (currentRow == null) {
return;
}
-
+
// precondition: the current row is set up and the sources all have the same column qualifier
// while we don't have a match, seek in the source with the smallest column qualifier
sources[0].iter.next();
-
+
advanceToIntersection();
-
+
if (hasTop()) {
if (overallRange != null && !overallRange.contains(topKey)) {
topKey = null;
}
}
}
-
+
protected void advanceToIntersection() throws IOException {
if (log.isDebugEnabled()) {
log.debug("In AndIterator.advanceToIntersection()");
}
-
+
boolean cursorChanged = true;
while (cursorChanged) {
// seek all of the sources to at least the highest seen column qualifier in the current row
@@ -561,61 +578,62 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
}
}
}
-
+
topKey = buildKey(currentRow, currentTerm, currentDocID);
-
+
if (log.isDebugEnabled()) {
log.debug("ModifiedIntersectingIterator: Got a match: " + topKey);
}
}
-
+
public static String stringTopKey(SortedKeyValueIterator<Key,Value> iter) {
if (iter.hasTop()) {
return iter.getTopKey().toString();
}
return "";
}
-
+
public static final String columnFamiliesOptionName = "columnFamilies";
public static final String termValuesOptionName = "termValues";
public static final String notFlagsOptionName = "notFlags";
-
+
/**
* Encode a <code>Text</code> array of all the columns to intersect on
- *
+ *
* @param columns
* The columns to be encoded
* @return A Base64 encoded string (using a \n delimiter) of all columns to intersect on.
*/
public static String encodeColumns(Text[] columns) {
StringBuilder sb = new StringBuilder();
- for (int i = 0; i < columns.length; i++) {
- sb.append(new String(Base64.encodeBase64(TextUtil.getBytes(columns[i]))));
+ for (Text column : columns) {
+ sb.append(new String(Base64.encodeBase64(TextUtil.getBytes(column))));
sb.append('\n');
}
return sb.toString();
}
-
+
/**
- * Encode a <code>Text</code> array of all of the terms to intersect on. The terms should match the columns in a one-to-one manner
- *
+ * Encode a <code>Text</code> array of all of the terms to intersect on. The terms should match
+ * the columns in a one-to-one manner
+ *
* @param terms
* The terms to be encoded
* @return A Base64 encoded string (using a \n delimiter) of all terms to intersect on.
*/
public static String encodeTermValues(Text[] terms) {
StringBuilder sb = new StringBuilder();
- for (int i = 0; i < terms.length; i++) {
- sb.append(new String(Base64.encodeBase64(TextUtil.getBytes(terms[i]))));
+ for (Text term : terms) {
+ sb.append(new String(Base64.encodeBase64(TextUtil.getBytes(term))));
sb.append('\n');
}
-
+
return sb.toString();
}
-
+
/**
* Encode an array of <code>booleans</code> denoted which columns are NOT'ed
- *
+ *
* @param flags
* The array of NOTs
* @return A base64 encoded string of which columns are NOT'ed
@@ -631,10 +649,10 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
}
return new String(Base64.encodeBase64(bytes));
}
-
+
/**
* Decode the encoded columns into a <code>Text</code> array
- *
+ *
* @param columns
* The Base64 encoded String of the columns
* @return A Text array of the decoded columns
@@ -645,13 +663,13 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
for (int i = 0; i < columnStrings.length; i++) {
columnTexts[i] = new Text(Base64.decodeBase64(columnStrings[i].getBytes()));
}
-
+
return columnTexts;
}
-
+
/**
* Decode the encoded terms into a <code>Text</code> array
- *
+ *
* @param terms
* The Base64 encoded String of the terms
* @return A Text array of decoded terms.
@@ -662,14 +680,13 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
for (int i = 0; i < termStrings.length; i++) {
termTexts[i] = new Text(Base64.decodeBase64(termStrings[i].getBytes()));
}
-
+
return termTexts;
}
-
+
/**
* Decode the encoded NOT flags into a <code>boolean</code> array
- *
- * @param flags
+ *
* @return A boolean array of decoded NOT flags
*/
public static boolean[] decodeBooleans(String flags) {
@@ -677,7 +694,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
if (flags == null) {
return null;
}
-
+
byte[] bytes = Base64.decodeBase64(flags.getBytes());
boolean[] bFlags = new boolean[bytes.length];
for (int i = 0; i < bytes.length; i++) {
@@ -687,23 +704,25 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
bFlags[i] = false;
}
}
-
+
return bFlags;
}
-
- public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
+
+ @Override
+ public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options,
+ IteratorEnvironment env) throws IOException {
if (log.isDebugEnabled()) {
log.debug("In AndIterator.init()");
}
-
+
Text[] dataLocations = decodeColumns(options.get(columnFamiliesOptionName));
Text[] terms = decodeTermValues(options.get(termValuesOptionName));
boolean[] notFlags = decodeBooleans(options.get(notFlagsOptionName));
-
+
if (terms.length < 2) {
throw new IllegalArgumentException("AndIterator requires two or more columns families");
}
-
+
// Scan the not flags.
// There must be at least one term that isn't negated
// And we are going to re-order such that the first term is not a ! term
@@ -713,8 +732,9 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
notFlags[i] = false;
}
}
-
- // Make sure that the first dataLocation/Term is not a NOT by swapping it with a later dataLocation/Term
+
+ // Make sure that the first dataLocation/Term is not a NOT by swapping it with a later
+ // dataLocation/Term
if (notFlags[0]) {
for (int i = 1; i < notFlags.length; i++) {
if (notFlags[i] == false) {
@@ -722,34 +742,37 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
Text swap = new Text(terms[0]);
terms[0].set(terms[i]);
terms[i].set(swap);
-
+
// Swap the dataLocations
swap.set(dataLocations[0]);
dataLocations[0].set(dataLocations[i]);
dataLocations[i].set(swap);
-
+
// Flip the notFlags
notFlags[0] = false;
notFlags[i] = true;
break;
}
}
-
+
if (notFlags[0]) {
- throw new IllegalArgumentException("AndIterator requires at least one column family without not");
+ throw new IllegalArgumentException(
+ "AndIterator requires at least one column family without not");
}
}
-
+
// Build up the array of sources that are to be intersected
sources = new TermSource[dataLocations.length];
for (int i = 0; i < dataLocations.length; i++) {
sources[i] = new TermSource(source.deepCopy(env), dataLocations[i], terms[i], notFlags[i]);
}
-
+
sourcesCount = dataLocations.length;
}
-
- public void seek(Range range, Collection<ByteSequence> seekColumnFamilies, boolean inclusive) throws IOException {
+
+ @Override
+ public void seek(Range range, Collection<ByteSequence> seekColumnFamilies, boolean inclusive)
+ throws IOException {
if (log.isDebugEnabled()) {
log.debug("In AndIterator.seek()");
log.debug("AndIterator.seek Given range => " + range);
@@ -758,7 +781,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
currentDocID.set(emptyByteArray);
doSeek(range);
}
-
+
private void doSeek(Range range) throws IOException {
overallRange = new Range(range);
@@ -766,7 +789,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
if (range.getEndKey() != null && range.getEndKey().getRow() != null) {
this.parentEndRow = range.getEndKey().getRow();
}
-
+
// seek each of the sources to the right column family within the row given by key
for (int i = 0; i < sourcesCount; i++) {
Key sourceKey;
@@ -775,22 +798,25 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
// Build a key with the DocID if one is given
if (range.getStartKey().getColumnFamily() != null) {
sourceKey = buildKey(getPartition(range.getStartKey()), dataLocation,
- (sources[i].term == null) ? nullText : new Text(sources[i].term + "\0" + range.getStartKey().getColumnFamily()));
+ (sources[i].term == null) ? nullText
+ : new Text(sources[i].term + "\0" + range.getStartKey().getColumnFamily()));
} // Build a key with just the term.
else {
sourceKey = buildKey(getPartition(range.getStartKey()), dataLocation,
(sources[i].term == null) ? nullText : sources[i].term);
}
- if (!range.isStartKeyInclusive())
+ if (!range.isStartKeyInclusive()) {
sourceKey = sourceKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL);
- sources[i].iter.seek(new Range(sourceKey, true, null, false), sources[i].seekColumnFamilies, SEEK_INCLUSIVE);
+ }
+ sources[i].iter.seek(new Range(sourceKey, true, null, false), sources[i].seekColumnFamilies,
+ SEEK_INCLUSIVE);
} else {
sources[i].iter.seek(range, sources[i].seekColumnFamilies, SEEK_INCLUSIVE);
}
}
-
+
advanceToIntersection();
-
+
if (hasTop()) {
if (overallRange != null && !overallRange.contains(topKey)) {
topKey = null;
@@ -800,12 +826,14 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
}
}
}
-
- public void addSource(SortedKeyValueIterator<Key,Value> source, IteratorEnvironment env, Text term, boolean notFlag) {
+
+ public void addSource(SortedKeyValueIterator<Key,Value> source, IteratorEnvironment env,
+ Text term, boolean notFlag) {
addSource(source, env, null, term, notFlag);
}
-
- public void addSource(SortedKeyValueIterator<Key,Value> source, IteratorEnvironment env, Text dataLocation, Text term, boolean notFlag) {
+
+ public void addSource(SortedKeyValueIterator<Key,Value> source, IteratorEnvironment env,
+ Text dataLocation, Text term, boolean notFlag) {
// Check if we have space for the added Source
if (sources == null) {
sources = new TermSource[1];
@@ -821,45 +849,47 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
}
sources = localSources;
}
-
+
sources[sourcesCount] = new TermSource(source.deepCopy(env), dataLocation, term, notFlag);
sourcesCount++;
}
-
+
public boolean jump(Key jumpKey) throws IOException {
if (log.isDebugEnabled()) {
log.debug("jump: " + jumpKey);
}
-
+
// is the jumpKey outside my overall range?
if (parentEndRow != null && parentEndRow.compareTo(jumpKey.getRow()) < 0) {
// can't go there.
if (log.isDebugEnabled()) {
- log.debug("jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow);
+ log.debug(
+ "jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow);
}
return false;
}
-
+
if (!hasTop()) {
// TODO: will need to add current/last row if you want to measure if
// we don't have topkey because we hit end of tablet.
-
+
if (log.isDebugEnabled()) {
log.debug("jump called, but topKey is null, must need to move to next row");
}
return false;
} else {
-
+
int comp = this.topKey.getRow().compareTo(jumpKey.getRow());
// compare rows
if (comp > 0) {
if (log.isDebugEnabled()) {
log.debug("jump, our row is ahead of jumpKey.");
- log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow" + parentEndRow);
+ log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow"
+ + parentEndRow);
}
return hasTop(); // do nothing, we're ahead of jumpKey row
} else if (comp < 0) { // a row behind jump key, need to move forward
-
+
if (log.isDebugEnabled()) {
log.debug("II jump, row jump");
}
@@ -881,14 +911,14 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
} else {
log.debug("myUid: " + myUid);
}
-
+
if (jumpUid == null) {
log.debug("jumpUid is null");
} else {
log.debug("jumpUid: " + jumpUid);
}
}
-
+
int ucomp = myUid.compareTo(jumpUid);
if (ucomp < 0) { // need to move all sources forward
if (log.isDebugEnabled()) {
@@ -898,7 +928,7 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
Range range = new Range(row);
this.currentRow = row;
this.currentDocID = new Text(this.getUID(jumpKey));
-
+
doSeek(range);
// make sure it is in the range if we have one.
@@ -909,8 +939,8 @@ public class AndIterator implements SortedKeyValueIterator<Key,Value> {
log.debug("jump, topKey is now: " + topKey);
}
return hasTop();
-
- }// else do nothing
+
+ } // else do nothing
if (hasTop() && parentEndRow != null && topKey.getRow().compareTo(parentEndRow) > 0) {
topKey = null;
}
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java
index 1324006..81317e7 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java
@@ -65,11 +65,12 @@ import org.apache.log4j.Logger;
import com.google.common.collect.Multimap;
public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>, OptionDescriber {
-
- private static final Collection<ByteSequence> EMPTY_COL_FAMS = new ArrayList<ByteSequence>();
+
+ private static final Collection<ByteSequence> EMPTY_COL_FAMS = new ArrayList<>();
protected static final Logger log = Logger.getLogger(BooleanLogicIterator.class);
public static final String QUERY_OPTION = "expr";
- public static final String TERM_CARDINALITIES = "TERM_CARDINALITIES"; // comma separated list of term : count
+ public static final String TERM_CARDINALITIES = "TERM_CARDINALITIES"; // comma separated list of
+ // term : count
public static final String FIELD_INDEX_QUERY = "FIELD_INDEX_QUERY";
public static final String FIELD_NAME_PREFIX = "fi\0";
// --------------------------------------------------------------------------
@@ -80,55 +81,58 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
private SortedKeyValueIterator<Key,Value> sourceIterator;
private BooleanLogicTreeNode root;
private PriorityQueue<BooleanLogicTreeNode> positives;
- private ArrayList<BooleanLogicTreeNode> negatives = new ArrayList<BooleanLogicTreeNode>();
+ private ArrayList<BooleanLogicTreeNode> negatives = new ArrayList<>();
private ArrayList<BooleanLogicTreeNode> rangerators;
private String updatedQuery;
- private Map<String,Long> termCardinalities = new HashMap<String,Long>();
+ private Map<String,Long> termCardinalities = new HashMap<>();
private Range overallRange = null;
private FieldIndexKeyParser keyParser;
-
+
public BooleanLogicIterator() {
keyParser = new FieldIndexKeyParser();
- rangerators = new ArrayList<BooleanLogicTreeNode>();
+ rangerators = new ArrayList<>();
}
-
+
public BooleanLogicIterator(BooleanLogicIterator other, IteratorEnvironment env) {
if (other.sourceIterator != null) {
this.sourceIterator = other.sourceIterator.deepCopy(env);
}
keyParser = new FieldIndexKeyParser();
- rangerators = new ArrayList<BooleanLogicTreeNode>();
+ rangerators = new ArrayList<>();
log.debug("Congratulations, you've reached the BooleanLogicIterator");
}
-
+
public static void setLogLevel(Level lev) {
log.setLevel(lev);
}
-
+
public void setDebug(Level lev) {
log.setLevel(lev);
}
-
+
+ @Override
public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
return new BooleanLogicIterator(this, env);
}
-
+
/**
- * <b>init</b> is responsible for setting up the iterator. It will pull the serialized boolean parse tree from the options mapping and construct the
- * appropriate sub-iterators
- *
- * Once initialized, this iterator will automatically seek to the first matching instance. If no top key exists, that means an event matching the boolean
- * logic did not exist in the partition. Subsequent calls to next will move the iterator and all sub-iterators to the next match.
- *
+ * <b>init</b> is responsible for setting up the iterator. It will pull the serialized boolean
+ * parse tree from the options mapping and construct the appropriate sub-iterators
+ *
+ * Once initialized, this iterator will automatically seek to the first matching instance. If no
+ * top key exists, that means an event matching the boolean logic did not exist in the partition.
+ * Subsequent calls to next will move the iterator and all sub-iterators to the next match.
+ *
* @param source
* The underlying SortedkeyValueIterator.
* @param options
* A Map<String, String> of options.
* @param env
* The iterator environment
- * @throws IOException
*/
- public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
+ @Override
+ public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options,
+ IteratorEnvironment env) throws IOException {
validateOptions(options);
try {
if (log.isDebugEnabled()) {
@@ -136,7 +140,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
// Copy the source iterator
sourceIterator = source.deepCopy(env);
-
+
// Potentially take advantage of term cardinalities
String[] terms = null;
if (null != options.get(TERM_CARDINALITIES)) {
@@ -148,14 +152,14 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
+
// Step 1: Parse the query
if (log.isDebugEnabled()) {
log.debug("QueryParser");
}
QueryParser qp = new QueryParser();
qp.execute(this.updatedQuery); // validateOptions updates the updatedQuery
-
+
// need to build the query tree based on jexl parsing.
// Step 2: refactor QueryTree - inplace modification
if (log.isDebugEnabled()) {
@@ -163,17 +167,17 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
TreeNode tree = qp.getIteratorTree();
this.root = transformTreeNode(tree);
-
+
if (log.isDebugEnabled()) {
log.debug("refactorTree");
}
this.root = refactorTree(this.root);
-
+
if (log.isDebugEnabled()) {
log.debug("collapseBranches");
}
collapseBranches(root);
-
+
// Step 3: create iterators where we need them.
createIteratorTree(this.root);
if (log.isDebugEnabled()) {
@@ -181,18 +185,19 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
// Step 4: split the positive and negative leaves
splitLeaves(this.root);
-
+
} catch (ParseException ex) {
log.error("ParseException in init: " + ex);
throw new IllegalArgumentException("Failed to parse query", ex);
} catch (Exception ex) {
throw new IllegalArgumentException("probably had no indexed terms", ex);
}
-
+
}
-
- /* *************************************************************************
- * Methods for sub iterator creation.
+
+ /*
+ * ************************************************************************* Methods for sub
+ * iterator creation.
*/
private void createIteratorTree(BooleanLogicTreeNode root) throws IOException {
if (log.isDebugEnabled()) {
@@ -201,7 +206,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
// Walk the tree, if all of your children are leaves, roll you into the
// appropriate iterator.
Enumeration<?> dfe = root.depthFirstEnumeration();
-
+
while (dfe.hasMoreElements()) {
BooleanLogicTreeNode node = (BooleanLogicTreeNode) dfe.nextElement();
if (!node.isLeaf() && node.getType() != ParserTreeConstants.JJTJEXLSCRIPT) {
@@ -217,29 +222,32 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
node.setUserObject(createOrIterator(node));
} else {
// throw an error.
- log.debug("createIteratorTree, encounterd a node type I do not know about: " + node.getType());
+ log.debug("createIteratorTree, encounterd a node type I do not know about: "
+ + node.getType());
log.debug("createIteratorTree, node contents: " + node.getContents());
}
node.removeAllChildren();
}
}
}
-
+
// now for remaining leaves, create basic iterators.
// you can add in specialized iterator mappings here if necessary.
dfe = root.depthFirstEnumeration();
while (dfe.hasMoreElements()) {
BooleanLogicTreeNode node = (BooleanLogicTreeNode) dfe.nextElement();
- if (node.isLeaf() && node.getType() != ParserTreeConstants.JJTANDNODE && node.getType() != ParserTreeConstants.JJTORNODE) {
+ if (node.isLeaf() && node.getType() != ParserTreeConstants.JJTANDNODE
+ && node.getType() != ParserTreeConstants.JJTORNODE) {
node.setUserObject(createFieldIndexIterator(node));
}
}
}
-
+
private AndIterator createIntersectingIterator(BooleanLogicTreeNode node) throws IOException {
if (log.isDebugEnabled()) {
log.debug("createIntersectingIterator(node)");
- log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue() + " , operator: " + node.getFieldOperator());
+ log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue()
+ + " , operator: " + node.getFieldOperator());
}
Text[] columnFamilies = new Text[node.getChildCount()];
Text[] termValues = new Text[node.getChildCount()];
@@ -253,72 +261,77 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
negationMask[i] = child.isNegated();
i++;
}
-
+
AndIterator ii = new AndIterator();
- Map<String,String> options = new HashMap<String,String>();
+ Map<String,String> options = new HashMap<>();
options.put(AndIterator.columnFamiliesOptionName, AndIterator.encodeColumns(columnFamilies));
options.put(AndIterator.termValuesOptionName, AndIterator.encodeTermValues(termValues));
options.put(AndIterator.notFlagsOptionName, AndIterator.encodeBooleans(negationMask));
-
+
ii.init(sourceIterator.deepCopy(env), options, env);
return ii;
}
-
+
private OrIterator createOrIterator(BooleanLogicTreeNode node) throws IOException {
if (log.isDebugEnabled()) {
log.debug("createOrIterator(node)");
- log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue() + " , operator: " + node.getFieldOperator());
+ log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue()
+ + " , operator: " + node.getFieldOperator());
}
-
+
Enumeration<?> children = node.children();
- ArrayList<Text> fams = new ArrayList<Text>();
- ArrayList<Text> quals = new ArrayList<Text>();
+ ArrayList<Text> fams = new ArrayList<>();
+ ArrayList<Text> quals = new ArrayList<>();
while (children.hasMoreElements()) {
BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement();
fams.add(child.getFieldName());
quals.add(child.getFieldValue());
}
-
+
OrIterator iter = new OrIterator();
SortedKeyValueIterator<Key,Value> source = sourceIterator.deepCopy(env);
for (int i = 0; i < fams.size(); i++) {
iter.addTerm(source, fams.get(i), quals.get(i), env);
}
-
+
return iter;
}
-
+
/*
- * This takes the place of the SortedKeyIterator used previously. This iterator is bound to the partitioned table structure. When next is called it will jump
- * rows as necessary internally versus needing to do it externally as was the case with the SortedKeyIterator.
+ * This takes the place of the SortedKeyIterator used previously. This iterator is bound to the
+ * partitioned table structure. When next is called it will jump rows as necessary internally
+ * versus needing to do it externally as was the case with the SortedKeyIterator.
*/
- private FieldIndexIterator createFieldIndexIterator(BooleanLogicTreeNode node) throws IOException {
+ private FieldIndexIterator createFieldIndexIterator(BooleanLogicTreeNode node)
+ throws IOException {
if (log.isDebugEnabled()) {
log.debug("BoolLogic.createFieldIndexIterator()");
- log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue() + " , operator: " + node.getFieldOperator());
+ log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue()
+ + " , operator: " + node.getFieldOperator());
}
Text rowId = null;
sourceIterator.seek(new Range(), EMPTY_COL_FAMS, false);
if (sourceIterator.hasTop()) {
rowId = sourceIterator.getTopKey().getRow();
}
-
- FieldIndexIterator iter = new FieldIndexIterator(node.getType(), rowId, node.getFieldName(), node.getFieldValue(), node.isNegated(),
- node.getFieldOperator());
-
- Map<String,String> options = new HashMap<String,String>();
+
+ FieldIndexIterator iter = new FieldIndexIterator(node.getType(), rowId, node.getFieldName(),
+ node.getFieldValue(), node.isNegated(), node.getFieldOperator());
+
+ Map<String,String> options = new HashMap<>();
iter.init(sourceIterator.deepCopy(env), options, env);
if (log.isDebugEnabled()) {
FieldIndexIterator.setLogLevel(Level.DEBUG);
} else {
FieldIndexIterator.setLogLevel(Level.OFF);
}
-
+
return iter;
}
-
- /* *************************************************************************
- * Methods for testing the tree WRT boolean logic.
+
+ /*
+ * ************************************************************************* Methods for testing
+ * the tree WRT boolean logic.
*/
// After all iterator pointers have been advanced, test if the current
// record passes the boolean logic.
@@ -330,13 +343,14 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
while (dfe.hasMoreElements()) {
BooleanLogicTreeNode node = (BooleanLogicTreeNode) dfe.nextElement();
if (!node.isLeaf()) {
-
+
int type = node.getType();
if (type == ParserTreeConstants.JJTANDNODE) { // BooleanLogicTreeNode.NodeType.AND) {
handleAND(node);
} else if (type == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) {
handleOR(node);
- } else if (type == ParserTreeConstants.JJTJEXLSCRIPT) {// BooleanLogicTreeNode.NodeType.HEAD) {
+ } else if (type == ParserTreeConstants.JJTJEXLSCRIPT) {// BooleanLogicTreeNode.NodeType.HEAD)
+ // {
handleHEAD(node);
} else if (type == ParserTreeConstants.JJTNOTNODE) { // BooleanLogicTreeNode.NodeType.NOT) {
// there should not be any "NOT"s.
@@ -344,14 +358,18 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
} else {
// it is a leaf, if it is an AND or OR do something
- if (node.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) { //OrIterator
+ if (node.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) {
+ // //OrIterator
node.setValid(node.hasTop());
node.reSet();
node.addToSet(node.getTopKey());
-
- } else if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTEQNODE
- || node.getType() == ParserTreeConstants.JJTERNODE || node.getType() == ParserTreeConstants.JJTLENODE
- || node.getType() == ParserTreeConstants.JJTLTNODE || node.getType() == ParserTreeConstants.JJTGENODE
+
+ } else if (node.getType() == ParserTreeConstants.JJTANDNODE
+ || node.getType() == ParserTreeConstants.JJTEQNODE
+ || node.getType() == ParserTreeConstants.JJTERNODE
+ || node.getType() == ParserTreeConstants.JJTLENODE
+ || node.getType() == ParserTreeConstants.JJTLTNODE
+ || node.getType() == ParserTreeConstants.JJTGENODE
|| node.getType() == ParserTreeConstants.JJTGTNODE) {
// sub iterator guarantees it is in its internal range,
// otherwise, no top.
@@ -359,58 +377,69 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
+
if (log.isDebugEnabled()) {
- log.debug("BoolLogic.testTreeState end, treeState:: " + this.root.getContents() + " , valid: " + root.isValid());
+ log.debug("BoolLogic.testTreeState end, treeState:: " + this.root.getContents()
+ + " , valid: " + root.isValid());
}
return this.root.isValid();
}
-
+
private void handleHEAD(BooleanLogicTreeNode node) {
Enumeration<?> children = node.children();
while (children.hasMoreElements()) {
BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement();
-
- if (child.getType() == ParserTreeConstants.JJTANDNODE) {// BooleanLogicTreeNode.NodeType.AND) {
+
+ if (child.getType() == ParserTreeConstants.JJTANDNODE) {// BooleanLogicTreeNode.NodeType.AND)
+ // {
node.setValid(child.isValid());
node.setTopKey(child.getTopKey());
- } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) {
+ } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR)
+ // {
node.setValid(child.isValid());
node.setTopKey(child.getTopKey());
- } else if (child.getType() == ParserTreeConstants.JJTEQNODE || child.getType() == ParserTreeConstants.JJTERNODE
- || child.getType() == ParserTreeConstants.JJTGTNODE || child.getType() == ParserTreeConstants.JJTGENODE
- || child.getType() == ParserTreeConstants.JJTLTNODE || child.getType() == ParserTreeConstants.JJTLENODE) {// BooleanLogicTreeNode.NodeType.SEL) {
+ } else if (child.getType() == ParserTreeConstants.JJTEQNODE
+ || child.getType() == ParserTreeConstants.JJTERNODE
+ || child.getType() == ParserTreeConstants.JJTGTNODE
+ || child.getType() == ParserTreeConstants.JJTGENODE
+ || child.getType() == ParserTreeConstants.JJTLTNODE
+ || child.getType() == ParserTreeConstants.JJTLENODE) {// BooleanLogicTreeNode.NodeType.SEL)
+ // {
node.setValid(true);
node.setTopKey(child.getTopKey());
if (child.getTopKey() == null) {
node.setValid(false);
}
}
- }// end while
-
+ } // end while
+
// I have to be valid AND have a top key
if (node.isValid() && !node.hasTop()) {
node.setValid(false);
}
}
-
+
private void handleAND(BooleanLogicTreeNode me) {
if (log.isDebugEnabled()) {
log.debug("handleAND::" + me.getContents());
}
Enumeration<?> children = me.children();
me.setValid(true); // it's easier to prove false than true
-
- HashSet<Key> goodSet = new HashSet<Key>();
- HashSet<Key> badSet = new HashSet<Key>();
+
+ HashSet<Key> goodSet = new HashSet<>();
+ HashSet<Key> badSet = new HashSet<>();
while (children.hasMoreElements()) {
BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement();
-
- if (child.getType() == ParserTreeConstants.JJTEQNODE || child.getType() == ParserTreeConstants.JJTANDNODE
- || child.getType() == ParserTreeConstants.JJTERNODE || child.getType() == ParserTreeConstants.JJTNENODE
- || child.getType() == ParserTreeConstants.JJTGENODE || child.getType() == ParserTreeConstants.JJTLENODE
- || child.getType() == ParserTreeConstants.JJTGTNODE || child.getType() == ParserTreeConstants.JJTLTNODE) {
-
+
+ if (child.getType() == ParserTreeConstants.JJTEQNODE
+ || child.getType() == ParserTreeConstants.JJTANDNODE
+ || child.getType() == ParserTreeConstants.JJTERNODE
+ || child.getType() == ParserTreeConstants.JJTNENODE
+ || child.getType() == ParserTreeConstants.JJTGENODE
+ || child.getType() == ParserTreeConstants.JJTLENODE
+ || child.getType() == ParserTreeConstants.JJTGTNODE
+ || child.getType() == ParserTreeConstants.JJTLTNODE) {
+
if (child.isNegated()) {
if (child.hasTop()) {
badSet.add(child.getTopKey());
@@ -436,7 +465,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
me.setValid(false);
return;
}
-
+
// if good set is empty, add it.
if (goodSet.isEmpty()) {
if (log.isDebugEnabled()) {
@@ -448,17 +477,20 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
// if either fails, I'm false.
if (!goodSet.contains(child.getTopKey())) {
if (log.isDebugEnabled()) {
- log.debug("handleAND, goodSet is not empty, and does NOT contain child, setting false. child: " + child.getContents());
+ log.debug(
+ "handleAND, goodSet is not empty, and does NOT contain child, setting false. child: "
+ + child.getContents());
}
me.setValid(false);
return;
} else {
// trim the good set to this one value
// (handles the case were the initial encounters were ORs)
- goodSet = new HashSet<Key>();
+ goodSet = new HashSet<>();
goodSet.add(child.getTopKey());
if (log.isDebugEnabled()) {
- log.debug("handleAND, child in goodset, trim to this value: " + child.getContents());
+ log.debug(
+ "handleAND, child in goodset, trim to this value: " + child.getContents());
}
}
}
@@ -488,9 +520,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
- } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) {
-
+
+ } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR)
+ // {
+
// NOTE: The OR may be an OrIterator in which case it will only produce
// a single unique identifier, or it may be a pure logical construct and
// be capable of producing multiple unique identifiers.
@@ -501,7 +534,8 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
boolean pureNegations = true;
if (!child.isValid()) {
if (log.isDebugEnabled()) {
- log.debug("handleAND, child is an OR and it is not valid, setting false, ALL NEGATED?: " + child.isChildrenAllNegated());
+ log.debug("handleAND, child is an OR and it is not valid, setting false, ALL NEGATED?: "
+ + child.isChildrenAllNegated());
}
me.setValid(false); // I'm an AND if one of my children is false, I'm false.
return;
@@ -537,7 +571,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
+
// is the goodSet still empty? that means were were only negations
// otherwise, if it's not empty and we didn't match one, false
if (child.isNegated()) {
@@ -558,16 +592,16 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
me.setValid(false);
return;
}
-
+
// we matched something, trim the set.
// i.e. two child ORs
goodSet = child.getIntersection(goodSet);
}
}
-
+
}
- }// end while
-
+ } // end while
+
if (goodSet.isEmpty()) { // && log.isDebugEnabled()) {
if (log.isDebugEnabled()) {
log.debug("handleAND-> goodSet is empty, pure negations?");
@@ -579,7 +613,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
+
private void handleOR(BooleanLogicTreeNode me) {
Enumeration<?> children = me.children();
// I'm an OR node, need at least one positive.
@@ -591,12 +625,16 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
// 3 cases for child: SEL, AND, OR
// and negation
BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement();
- if (child.getType() == ParserTreeConstants.JJTEQNODE || child.getType() == ParserTreeConstants.JJTNENODE
- || child.getType() == ParserTreeConstants.JJTANDNODE || child.getType() == ParserTreeConstants.JJTERNODE
- || child.getType() == ParserTreeConstants.JJTNRNODE || child.getType() == ParserTreeConstants.JJTLENODE
- || child.getType() == ParserTreeConstants.JJTLTNODE || child.getType() == ParserTreeConstants.JJTGENODE
+ if (child.getType() == ParserTreeConstants.JJTEQNODE
+ || child.getType() == ParserTreeConstants.JJTNENODE
+ || child.getType() == ParserTreeConstants.JJTANDNODE
+ || child.getType() == ParserTreeConstants.JJTERNODE
+ || child.getType() == ParserTreeConstants.JJTNRNODE
+ || child.getType() == ParserTreeConstants.JJTLENODE
+ || child.getType() == ParserTreeConstants.JJTLTNODE
+ || child.getType() == ParserTreeConstants.JJTGENODE
|| child.getType() == ParserTreeConstants.JJTGTNODE) {
-
+
if (child.hasTop()) {
if (child.isNegated()) {
// do nothing.
@@ -612,8 +650,9 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
// that child could be pure negations in which case I'm true
me.setValid(child.isValid());
}
-
- } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) {
+
+ } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR)
+ // {
if (child.hasTop()) {
if (!child.isNegated()) {
allNegated = false;
@@ -634,8 +673,8 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
- }// end while
-
+ } // end while
+
if (allNegated) {
// do all my children have top?
children = me.children();
@@ -648,7 +687,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
me.setValid(false);
-
+
} else {
Key k = me.getMinUniqueID();
if (k == null) {
@@ -659,9 +698,9 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
- /* *************************************************************************
- * Utility methods.
+
+ /*
+ * ************************************************************************* Utility methods.
*/
// Transforms the TreeNode tree of query.parser into the
// BooleanLogicTreeNodeJexl form.
@@ -670,11 +709,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
if (log.isDebugEnabled()) {
log.debug("Equals Node");
}
-
+
Multimap<String,QueryTerm> terms = node.getTerms();
for (String fName : terms.keySet()) {
Collection<QueryTerm> values = terms.get(fName);
-
+
for (QueryTerm t : values) {
if (null == t || null == t.getValue()) {
continue;
@@ -682,21 +721,22 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
String fValue = t.getValue().toString();
fValue = fValue.replace("'", "");
boolean negated = t.getOperator().equals("!=");
-
+
if (!fName.startsWith(FIELD_NAME_PREFIX)) {
fName = FIELD_NAME_PREFIX + fName;
}
- BooleanLogicTreeNode child = new BooleanLogicTreeNode(ParserTreeConstants.JJTEQNODE, fName, fValue, negated);
+ BooleanLogicTreeNode child =
+ new BooleanLogicTreeNode(ParserTreeConstants.JJTEQNODE, fName, fValue, negated);
return child;
}
}
}
-
+
if (node.getType().equals(ASTERNode.class) || node.getType().equals(ASTNRNode.class)) {
if (log.isDebugEnabled()) {
log.debug("Regex Node");
}
-
+
Multimap<String,QueryTerm> terms = node.getTerms();
for (String fName : terms.keySet()) {
Collection<QueryTerm> values = terms.get(fName);
@@ -707,23 +747,24 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
String fValue = t.getValue().toString();
fValue = fValue.replaceAll("'", "");
boolean negated = node.getType().equals(ASTNRNode.class);
-
+
if (!fName.startsWith(FIELD_NAME_PREFIX)) {
fName = FIELD_NAME_PREFIX + fName;
}
-
- BooleanLogicTreeNode child = new BooleanLogicTreeNode(ParserTreeConstants.JJTERNODE, fName, fValue, negated);
+
+ BooleanLogicTreeNode child =
+ new BooleanLogicTreeNode(ParserTreeConstants.JJTERNODE, fName, fValue, negated);
return child;
}
}
}
-
- if (node.getType().equals(ASTLTNode.class) || node.getType().equals(ASTLENode.class) || node.getType().equals(ASTGTNode.class)
- || node.getType().equals(ASTGENode.class)) {
+
+ if (node.getType().equals(ASTLTNode.class) || node.getType().equals(ASTLENode.class)
+ || node.getType().equals(ASTGTNode.class) || node.getType().equals(ASTGENode.class)) {
Multimap<String,QueryTerm> terms = node.getTerms();
for (String fName : terms.keySet()) {
Collection<QueryTerm> values = terms.get(fName);
-
+
if (!fName.startsWith(FIELD_NAME_PREFIX)) {
fName = FIELD_NAME_PREFIX + fName;
}
@@ -733,9 +774,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
String fValue = t.getValue().toString();
fValue = fValue.replaceAll("'", "").toLowerCase();
- boolean negated = false; // to be negated, must be child of Not, which is handled elsewhere.
+ boolean negated = false; // to be negated, must be child of Not, which is handled
+ // elsewhere.
int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator());
-
+
BooleanLogicTreeNode child = new BooleanLogicTreeNode(mytype, fName, fValue, negated);
if (log.isDebugEnabled()) {
log.debug("adding child node: " + child.getContents());
@@ -744,11 +786,12 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
+
BooleanLogicTreeNode returnNode = null;
-
+
if (node.getType().equals(ASTAndNode.class) || node.getType().equals(ASTOrNode.class)) {
- int parentType = node.getType().equals(ASTAndNode.class) ? ParserTreeConstants.JJTANDNODE : ParserTreeConstants.JJTORNODE;
+ int parentType = node.getType().equals(ASTAndNode.class) ? ParserTreeConstants.JJTANDNODE
+ : ParserTreeConstants.JJTORNODE;
if (log.isDebugEnabled()) {
log.debug("AND/OR node: " + parentType);
}
@@ -777,7 +820,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
} else {
returnNode = new BooleanLogicTreeNode(parentType);
-
+
}
} else if (node.getType().equals(ASTNotNode.class)) {
if (log.isDebugEnabled()) {
@@ -788,7 +831,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
Multimap<String,QueryTerm> terms = node.getTerms();
for (String fName : terms.keySet()) {
Collection<QueryTerm> values = terms.get(fName);
-
+
if (!fName.startsWith(FIELD_NAME_PREFIX)) {
fName = FIELD_NAME_PREFIX + fName;
}
@@ -800,7 +843,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
fValue = fValue.replaceAll("'", "").toLowerCase();
boolean negated = !t.getOperator().equals("!=");
int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator());
-
+
if (!fName.startsWith(FIELD_NAME_PREFIX)) {
fName = FIELD_NAME_PREFIX + fName;
}
@@ -810,8 +853,9 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
} else {
returnNode = new BooleanLogicTreeNode(ParserTreeConstants.JJTNOTNODE);
}
- } else if (node.getType().equals(ASTJexlScript.class) || node.getType().getSimpleName().equals("RootNode")) {
-
+ } else if (node.getType().equals(ASTJexlScript.class)
+ || node.getType().getSimpleName().equals("RootNode")) {
+
if (log.isDebugEnabled()) {
log.debug("ROOT/JexlScript node");
}
@@ -821,7 +865,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
Multimap<String,QueryTerm> terms = node.getTerms();
for (String fName : terms.keySet()) {
Collection<QueryTerm> values = terms.get(fName);
-
+
if (!fName.startsWith(FIELD_NAME_PREFIX)) {
fName = FIELD_NAME_PREFIX + fName;
}
@@ -833,7 +877,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
fValue = fValue.replaceAll("'", "").toLowerCase();
boolean negated = t.getOperator().equals("!=");
int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator());
-
+
BooleanLogicTreeNode child = new BooleanLogicTreeNode(mytype, fName, fValue, negated);
returnNode.add(child);
return returnNode;
@@ -843,38 +887,41 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
returnNode = new BooleanLogicTreeNode(ParserTreeConstants.JJTJEXLSCRIPT);
}
} else {
- log.error("Currently Unsupported Node type: " + node.getClass().getName() + " \t" + node.getType());
+ log.error(
+ "Currently Unsupported Node type: " + node.getClass().getName() + " \t" + node.getType());
}
for (TreeNode child : node.getChildren()) {
returnNode.add(transformTreeNode(child));
}
-
+
return returnNode;
}
-
+
// After tree conflicts have been resolve, we can collapse branches where
// leaves have been pruned.
public static void collapseBranches(BooleanLogicTreeNode myroot) throws Exception {
-
+
// NOTE: doing a depth first enumeration didn't wory when I started
// removing nodes halfway through. The following method does work,
// it's essentially a reverse breadth first traversal.
- List<BooleanLogicTreeNode> nodes = new ArrayList<BooleanLogicTreeNode>();
+ List<BooleanLogicTreeNode> nodes = new ArrayList<>();
Enumeration<?> bfe = myroot.breadthFirstEnumeration();
-
+
while (bfe.hasMoreElements()) {
BooleanLogicTreeNode node = (BooleanLogicTreeNode) bfe.nextElement();
nodes.add(node);
}
-
+
// walk backwards
for (int i = nodes.size() - 1; i >= 0; i--) {
BooleanLogicTreeNode node = nodes.get(i);
if (log.isDebugEnabled()) {
- log.debug("collapseBranches, inspecting node: " + node.toString() + " " + node.printNode());
+ log.debug(
+ "collapseBranches, inspecting node: " + node.toString() + " " + node.printNode());
}
-
- if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) {
+
+ if (node.getType() == ParserTreeConstants.JJTANDNODE
+ || node.getType() == ParserTreeConstants.JJTORNODE) {
if (node.getChildCount() == 0 && !node.isRangeNode()) {
node.removeFromParent();
} else if (node.getChildCount() == 1) {
@@ -882,7 +929,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
BooleanLogicTreeNode c = (BooleanLogicTreeNode) node.getFirstChild();
node.removeFromParent();
p.add(c);
-
+
}
} else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) {
if (node.getChildCount() == 0) {
@@ -893,26 +940,27 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
+
}
-
+
public BooleanLogicTreeNode refactorTree(BooleanLogicTreeNode myroot) {
- List<BooleanLogicTreeNode> nodes = new ArrayList<BooleanLogicTreeNode>();
+ List<BooleanLogicTreeNode> nodes = new ArrayList<>();
Enumeration<?> bfe = myroot.breadthFirstEnumeration();
-
+
while (bfe.hasMoreElements()) {
BooleanLogicTreeNode node = (BooleanLogicTreeNode) bfe.nextElement();
nodes.add(node);
}
-
+
// walk backwards
for (int i = nodes.size() - 1; i >= 0; i--) {
BooleanLogicTreeNode node = nodes.get(i);
- if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) {
+ if (node.getType() == ParserTreeConstants.JJTANDNODE
+ || node.getType() == ParserTreeConstants.JJTORNODE) {
// 1. check to see if all children are negated
// 2. check to see if we have to handle ranges.
-
- Map<Text,RangeBounds> ranges = new HashMap<Text,RangeBounds>();
+
+ Map<Text,RangeBounds> ranges = new HashMap<>();
Enumeration<?> children = node.children();
boolean allNegated = true;
while (children.hasMoreElements()) {
@@ -921,7 +969,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
allNegated = false;
// break;
}
-
+
// currently we are not allowing unbounded ranges, so they must sit under an AND node.
if (node.getType() == ParserTreeConstants.JJTANDNODE) {
// check for ranges
@@ -979,10 +1027,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
if (allNegated) {
node.setChildrenAllNegated(true);
}
-
+
// see if the AND node had a range.
if (node.getType() == ParserTreeConstants.JJTANDNODE) {
-
+
// if(ranges.containsKey(node.getFieldName())){
if (!ranges.isEmpty()) {
// we have a range, process it
@@ -994,7 +1042,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
node.setType(ParserTreeConstants.JJTORNODE);
node.removeAllChildren();
// RangeBounds rb = ranges.get(node.getFieldName());
-
+
for (Entry<Text,RangeBounds> entry : ranges.entrySet()) {
Text fName = entry.getKey();
RangeBounds rb = entry.getValue();
@@ -1004,41 +1052,44 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
node.setUpperBound(rb.getUpper());
node.setRangeNode(true);
}
-
+
rangerators.add(node);
-
+
if (log.isDebugEnabled()) {
log.debug("refactor: " + node.getContents());
log.debug("refactor: " + node.getLowerBound() + " " + node.getUpperBound());
}
-
+
} else {
if (log.isDebugEnabled()) {
log.debug("AND range more than 2 children");
}
// node has range plus other children, create another node from the range
// remove lt,le,gt,ge from parent and push in a single node
-
+
// removing nodes via enumeration doesn't work, push into a list
// and walk backwards
- List<BooleanLogicTreeNode> temp = new ArrayList<BooleanLogicTreeNode>();
+ List<BooleanLogicTreeNode> temp = new ArrayList<>();
Enumeration<?> e = node.children();
while (e.hasMoreElements()) {
BooleanLogicTreeNode c = (BooleanLogicTreeNode) e.nextElement();
temp.add(c);
}
-
+
for (int j = temp.size() - 1; j >= 0; j--) {
BooleanLogicTreeNode c = temp.get(j);
- if (c.getType() == JexlOperatorConstants.JJTLENODE || c.getType() == JexlOperatorConstants.JJTLTNODE
- || c.getType() == JexlOperatorConstants.JJTGENODE || c.getType() == JexlOperatorConstants.JJTGTNODE) {
+ if (c.getType() == JexlOperatorConstants.JJTLENODE
+ || c.getType() == JexlOperatorConstants.JJTLTNODE
+ || c.getType() == JexlOperatorConstants.JJTGENODE
+ || c.getType() == JexlOperatorConstants.JJTGTNODE) {
c.removeFromParent();
}
}
-
+
for (Entry<Text,RangeBounds> entry : ranges.entrySet()) {
Text fName = entry.getKey();
- BooleanLogicTreeNode nchild = new BooleanLogicTreeNode(ParserTreeConstants.JJTORNODE, fName.toString(), "");
+ BooleanLogicTreeNode nchild =
+ new BooleanLogicTreeNode(ParserTreeConstants.JJTORNODE, fName.toString(), "");
RangeBounds rb = entry.getValue();
nchild.setFieldValue(new Text(""));
nchild.setLowerBound(rb.getLower());
@@ -1047,21 +1098,21 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
node.add(nchild);
rangerators.add(nchild);
}
-
+
if (log.isDebugEnabled()) {
log.debug("refactor: " + node.getContents());
}
}
}
}
-
+
}
}
-
+
return myroot;
-
+
}
-
+
// If all children are of type SEL, roll this up into an AND or OR node.
private static boolean canRollUp(BooleanLogicTreeNode parent) {
if (log.isDebugEnabled()) {
@@ -1076,21 +1127,23 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
Enumeration<?> e = parent.children();
while (e.hasMoreElements()) {
BooleanLogicTreeNode child = (BooleanLogicTreeNode) e.nextElement();
-
+
if (child.getType() != ParserTreeConstants.JJTEQNODE) {// BooleanLogicTreeNode.NodeType.SEL) {
if (log.isDebugEnabled()) {
- log.debug("canRollUp: child.getType -> " + ParserTreeConstants.jjtNodeName[child.getType()] + " int: " + child.getType() + " return false");
+ log.debug(
+ "canRollUp: child.getType -> " + ParserTreeConstants.jjtNodeName[child.getType()]
+ + " int: " + child.getType() + " return false");
}
return false;
}
-
+
if (child.isNegated()) {
if (log.isDebugEnabled()) {
log.debug("canRollUp: child.isNegated, return false");
}
return false;
}
-
+
if (child.getFieldValue().toString().contains("*")) {
if (log.isDebugEnabled()) {
log.debug("canRollUp: child has wildcard: " + child.getFieldValue());
@@ -1100,10 +1153,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
return true;
}
-
+
/**
- * Small utility function to print out the depth-first enumeration of the tree. Specify the root or sub root of the tree you wish to view.
- *
+ * Small utility function to print out the depth-first enumeration of the tree. Specify the root
+ * or sub root of the tree you wish to view.
+ *
* @param root
* The root node of the tree or sub-tree.
*/
@@ -1117,7 +1171,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
System.out.println(i + " : " + n);
}
}
-
+
public static void showBreadthFirstTraversal(BooleanLogicTreeNode root) {
System.out.println("BreadthFirstTraversal");
log.debug("BooleanLogicIterator.showBreadthFirstTraversal()");
@@ -1130,19 +1184,19 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
log.debug(i + " : " + n);
}
}
-
+
private void splitLeaves(BooleanLogicTreeNode node) {
if (log.isDebugEnabled()) {
log.debug("BoolLogic: splitLeaves()");
}
- positives = new PriorityQueue<BooleanLogicTreeNode>(10, new BooleanLogicTreeNodeComparator());
+ positives = new PriorityQueue<>(10, new BooleanLogicTreeNodeComparator());
// positives = new ArrayList<BooleanLogicTreeNodeJexl>();
negatives.clear();
-
+
Enumeration<?> dfe = node.depthFirstEnumeration();
while (dfe.hasMoreElements()) {
BooleanLogicTreeNode elem = (BooleanLogicTreeNode) dfe.nextElement();
-
+
if (elem.isLeaf()) {
if (elem.isNegated()) {
negatives.add(elem);
@@ -1152,7 +1206,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
+
private void reHeapPriorityQueue(BooleanLogicTreeNode node) {
positives.clear();
Enumeration<?> dfe = node.depthFirstEnumeration();
@@ -1165,23 +1219,27 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
- /* *************************************************************************
- * The iterator interface methods.
+ /*
+ * ************************************************************************* The iterator
+ * interface methods.
*/
+ @Override
public boolean hasTop() {
return (topKey != null);
}
-
+
+ @Override
public Key getTopKey() {
if (log.isDebugEnabled()) {
log.debug("getTopKey: " + topKey);
}
return topKey;
}
-
+
private void setTopKey(Key key) {
if (this.overallRange != null && key != null) {
- if (overallRange.getEndKey() != null) { // if null end key, that means range is to the end of the tablet.
+ if (overallRange.getEndKey() != null) { // if null end key, that means range is to the end of
+ // the tablet.
if (!this.overallRange.contains(key)) {
topKey = null;
return;
@@ -1190,21 +1248,22 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
topKey = key;
}
-
+
+ @Override
public Value getTopValue() {
if (topValue == null) {
topValue = new Value(new byte[0]);
}
return topValue;
}
-
+
private void resetNegatives() {
for (BooleanLogicTreeNode neg : negatives) {
neg.setTopKey(null);
neg.setValid(true);
}
}
-
+
private String getEventKeyUid(Key k) {
if (k == null || k.getColumnFamily() == null) {
return null;
@@ -1212,7 +1271,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
return k.getColumnFamily().toString();
}
}
-
+
private String getIndexKeyUid(Key k) {
try {
int idx = 0;
@@ -1223,17 +1282,18 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
return null;
}
}
-
+
/*
- * Remember, the Key in the BooleanLogicTreeNode is different structurally than the Key in its sub iterator because the key BooleanLogic needs to return is an
- * event key created from the index key (which is what the sub iterators are looking at!)
+ * Remember, the Key in the BooleanLogicTreeNode is different structurally than the Key in its sub
+ * iterator because the key BooleanLogic needs to return is an event key created from the index
+ * key (which is what the sub iterators are looking at!)
*/
private Key getOptimizedAdvanceKey() throws IOException {
if (log.isDebugEnabled()) {
log.debug("getOptimizedAdvanceKey() called");
}
Enumeration<?> bfe = root.breadthFirstEnumeration();
- ArrayList<BooleanLogicTreeNode> bfl = new ArrayList<BooleanLogicTreeNode>();
+ ArrayList<BooleanLogicTreeNode> bfl = new ArrayList<>();
while (bfe.hasMoreElements()) {
BooleanLogicTreeNode node = (BooleanLogicTreeNode) bfe.nextElement();
if (!node.isNegated()) {
@@ -1242,7 +1302,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
bfl.add(node);
}
}
-
+
// walk the tree backwards
for (int i = bfl.size() - 1; i >= 0; i--) {
if (bfl.get(i).isLeaf() || bfl.get(i).isNegated()) {
@@ -1251,7 +1311,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
continue;
}
-
+
BooleanLogicTreeNode node = bfl.get(i);
node.setDone(false);
if (log.isDebugEnabled()) {
@@ -1264,11 +1324,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
boolean firstTime = true;
while (children.hasMoreElements()) {
BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement();
-
+
if (child.isNegated() || child.isChildrenAllNegated()) {
continue;
}
-
+
// all advance keys were initially set from topkey for the leaves.
if (child.getAdvanceKey() == null) {
log.debug("\tchild does not advance key: " + child.printNode());
@@ -1278,7 +1338,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
} else {
log.debug("\tchild advanceKey: " + child.getAdvanceKey());
}
-
+
if (firstTime) {
firstTime = false;
max = child;
@@ -1287,10 +1347,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
continue;
}
-
+
log.debug("\tAND block, max: " + max);
log.debug("\tAND block, child: " + child);
-
+
// first test row
if (max.getAdvanceKey().getRow().compareTo(child.getAdvanceKey().getRow()) < 0) {
max = child;
@@ -1299,7 +1359,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
continue;
}
-
+
// if rows are equal, test uids
String uid_max = getEventKeyUid(max.getAdvanceKey());
String uid_child = getEventKeyUid(child.getAdvanceKey());
@@ -1315,7 +1375,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
log.debug("\tuid_child: " + uid_child);
}
}
-
+
if (uid_max != null && uid_child != null) {
if (uid_max.compareTo(uid_child) < 0) {
max = child;
@@ -1340,7 +1400,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
node.setDone(true);
}
-
+
} else if (node.getType() == ParserTreeConstants.JJTORNODE) {
// get min
BooleanLogicTreeNode min = null;
@@ -1348,10 +1408,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
boolean firstTime = true;
int numChildren = node.getChildCount();
int allChildrenDone = 0;
-
+
while (children.hasMoreElements()) {
BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement();
-
+
if (log.isDebugEnabled()) {
log.debug("\tOR block start, child: " + child);
}
@@ -1376,16 +1436,17 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
break;
}
}
-
+
if (child.getAdvanceKey() == null) {
log.debug("\tOR child doesn't have top or an AdvanceKey");
continue;
}
if (firstTime) {
if (log.isDebugEnabled()) {
- log.debug("\tOR block, first valid node, min=child: " + child + " advanceKey: " + child.getAdvanceKey());
+ log.debug("\tOR block, first valid node, min=child: " + child + " advanceKey: "
+ + child.getAdvanceKey());
}
-
+
firstTime = false;
min = child;
continue;
@@ -1394,21 +1455,22 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
log.debug("\tOR block, min: " + min);
log.debug("\tOR block, child: " + child);
}
- if (min.getAdvanceKey().getRow().toString().compareTo(child.getAdvanceKey().getRow().toString()) > 0) {
+ if (min.getAdvanceKey().getRow().toString()
+ .compareTo(child.getAdvanceKey().getRow().toString()) > 0) {
// child row is less than min, set min to child
min = child;
if (log.isDebugEnabled()) {
log.debug("\tmin row was greater than child, min=child: " + min);
}
continue;
-
+
} else if (min.getAdvanceKey().getRow().compareTo(child.getAdvanceKey().getRow()) < 0) {
// min row is less child, skip
if (log.isDebugEnabled()) {
log.debug("\tmin row less than childs, keep min: " + min);
}
continue;
-
+
} else { // they're equal, test uids
String uid_min = getEventKeyUid(min.getAdvanceKey());
String uid_child = getEventKeyUid(child.getAdvanceKey());
@@ -1417,7 +1479,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
if (uid_min != null && uid_child != null) {
if (uid_min.compareTo(uid_child) > 0) {
-
+
min = child;
if (log.isDebugEnabled()) {
log.debug("\tuid_min > uid_child, set min to child: " + min);
@@ -1430,11 +1492,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
min = child;
}
}
- }// end while
+ } // end while
if (log.isDebugEnabled()) {
log.debug("attemptOptimization: OR with children, min: " + min);
}
-
+
if (min != null) {
if (log.isDebugEnabled()) {
log.debug("OR block, min != null, advanceKey? " + min.getAdvanceKey());
@@ -1445,13 +1507,13 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
node.setAdvanceKey(null);
node.setDone(true);
}
-
+
} else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) { // HEAD node
if (log.isDebugEnabled()) {
log.debug("getOptimizedAdvanceKey, HEAD node");
}
BooleanLogicTreeNode child = (BooleanLogicTreeNode) node.getFirstChild();
-
+
if (child.isDone()) {
if (log.isDebugEnabled()) {
log.debug("Head node's child is done, need to move to the next row");
@@ -1472,21 +1534,23 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
k = new Key(row);
child.setAdvanceKey(k);
}
-
+
}
if (log.isDebugEnabled()) {
log.debug("advance Key: " + child.getAdvanceKey());
}
- Key key = new Key(child.getAdvanceKey().getRow(), child.getAdvanceKey().getColumnFamily(), child.getAdvanceKey().getColumnFamily());
+ Key key = new Key(child.getAdvanceKey().getRow(), child.getAdvanceKey().getColumnFamily(),
+ child.getAdvanceKey().getColumnFamily());
return key;
-
- }// end else
- }// end for
+
+ } // end else
+ } // end for
return null;
}
-
+
/*
- * The incoming jump key has been formatted into the structure of an index key, but the leaves are eventkeys
+ * The incoming jump key has been formatted into the structure of an index key, but the leaves are
+ * eventkeys
*/
private boolean jump(Key jumpKey) throws IOException {
if (log.isDebugEnabled()) {
@@ -1497,7 +1561,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
BooleanLogicTreeNode n = (BooleanLogicTreeNode) bfe.nextElement();
n.setAdvanceKey(null);
} // now advance all nodes to the advance key
-
+
if (log.isDebugEnabled()) {
log.debug("jump, All leaves need to advance to: " + jumpKey);
}
@@ -1512,7 +1576,8 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
return ok;
}
-
+
+ @Override
@SuppressWarnings("unused")
public void next() throws IOException {
if (log.isDebugEnabled()) {
@@ -1524,12 +1589,12 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
setTopKey(null);
return;
}
-
+
Key previousJumpKey = null;
while (!finished) {
-
+
Key jumpKey = this.getOptimizedAdvanceKey();
-
+
if (jumpKey == null) { // stop?
if (log.isDebugEnabled()) {
log.debug("next(), jump key is null, stopping");
@@ -1537,7 +1602,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
setTopKey(null);
return;
}
-
+
if (log.isDebugEnabled()) {
if (jumpKey != null) {
log.debug("next(), jumpKey: " + jumpKey);
@@ -1545,31 +1610,33 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
log.debug("jumpKey is null");
}
}
-
+
boolean same = false;
if (jumpKey != null && topKey != null) {
// check that the uid's are not the same
same = getIndexKeyUid(jumpKey).equals(getEventKeyUid(topKey));
if (log.isDebugEnabled()) {
- log.debug("jumpKeyUid: " + getIndexKeyUid(jumpKey) + " topKeyUid: " + getEventKeyUid(topKey));
+ log.debug(
+ "jumpKeyUid: " + getIndexKeyUid(jumpKey) + " topKeyUid: " + getEventKeyUid(topKey));
}
}
-
+
if (log.isDebugEnabled()) {
log.debug("previousJumpKey: " + previousJumpKey);
log.debug("current JumpKey: " + jumpKey);
}
-
+
if (jumpKey != null && !this.overallRange.contains(jumpKey)) {
if (log.isDebugEnabled()) {
- log.debug("jumpKey is outside of range, that means the next key is out of range, stopping");
+ log.debug(
+ "jumpKey is outside of range, that means the next key is out of range, stopping");
log.debug("jumpKey: " + jumpKey + " overallRange.endKey: " + overallRange.getEndKey());
}
// stop
setTopKey(null);
return;
}
-
+
boolean previousSame = false;
if (previousJumpKey != null && jumpKey != null) {
previousSame = previousJumpKey.equals(jumpKey);
@@ -1580,7 +1647,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
previousJumpKey = jumpKey;
ok = jump(jumpKey); // attempt to jump everybody forward to this row and uid.
// tryJump = false;
-
+
// now test the tree state.
if (testTreeState()) {
Key tempKey = root.getTopKey();
@@ -1591,7 +1658,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
continue;
}
}
-
+
if (root.getTopKey().equals(tempKey)) {
// it's valid set nextKey and make sure it's not the same as topKey.
if (log.isDebugEnabled()) {
@@ -1600,10 +1667,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
} else {
log.debug("next, this.root.getTopKey() is null");
}
-
+
if (topKey != null) {
log.debug("topKey->" + topKey);
-
+
} else {
log.debug("topKey is null");
}
@@ -1615,11 +1682,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
+
// --------------------------------------
// Regular next block
} else {
-
+
reHeapPriorityQueue(this.root);
BooleanLogicTreeNode node;
@@ -1628,13 +1695,13 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
if (!node.isDone() && node.hasTop()) {
break;
}
-
+
if (positives.isEmpty()) {
setTopKey(null);
return;
}
}
-
+
if (log.isDebugEnabled()) {
if (jumpKey == null) {
log.debug("no jump, jumpKey is null");
@@ -1648,7 +1715,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
node.next();
resetNegatives();
-
+
if (!node.hasTop()) {
// it may be part of an or, so it could be ok.
node.setValid(false);
@@ -1665,7 +1732,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
finished = true;
return;
}
-
+
} else {
setTopKey(this.root.getTopKey());
return;
@@ -1673,12 +1740,12 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
} else {
-
+
if (overallRange.contains(node.getTopKey())) {
// the node had something so push it back into priority queue
positives.add(node);
}
-
+
// now test the tree state.
if (testTreeState()) {
Key tempKey = root.getTopKey();
@@ -1689,7 +1756,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
continue;
}
}
-
+
if (root.getTopKey().equals(tempKey)) {
// it's valid set nextKey and make sure it's not the same as topKey.
if (log.isDebugEnabled()) {
@@ -1698,10 +1765,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
} else {
log.debug("next, this.root.getTopKey() is null");
}
-
+
if (topKey != null) {
log.debug("topKey->" + topKey);
-
+
} else {
log.debug("topKey is null");
}
@@ -1724,9 +1791,9 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
+
}
-
+
// is the priority queue empty?
if (positives.isEmpty()) {
finished = true;
@@ -1735,7 +1802,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
+
/*
* create a range for the given row of the
*/
@@ -1745,17 +1812,19 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
Text rowID = k.getRow();
Text colFam = k.getColumnFamily();
-
+
for (BooleanLogicTreeNode neg : negatives) {
- Key startKey = new Key(rowID, neg.getFieldName(), new Text(neg.getFieldValue() + "\0" + colFam));
- Key endKey = new Key(rowID, neg.getFieldName(), new Text(neg.getFieldValue() + "\0" + colFam + "\1"));
+ Key startKey =
+ new Key(rowID, neg.getFieldName(), new Text(neg.getFieldValue() + "\0" + colFam));
+ Key endKey =
+ new Key(rowID, neg.getFieldName(), new Text(neg.getFieldValue() + "\0" + colFam + "\1"));
Range range = new Range(startKey, true, endKey, false);
-
+
if (log.isDebugEnabled()) {
log.debug("range: " + range);
}
neg.seek(range, EMPTY_COL_FAMS, false);
-
+
if (neg.hasTop()) {
neg.setValid(false);
}
@@ -1768,8 +1837,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
}
}
-
- public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
+
+ @Override
+ public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive)
+ throws IOException {
this.overallRange = range;
if (log.isDebugEnabled()) {
log.debug("seek, overallRange: " + overallRange);
@@ -1778,11 +1849,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
// NOTE: All of our iterators exist in the leaves.
topKey = null;
root.setTopKey(null);
-
+
// set up the range iterators for the given seek range.
// these should exist in the positives as OR iterators, but need special setup.
setupRangerators(range);
-
+
// don't take this out, if you jump rows on the tablet you could have
// pulled nodes out of the positives priority queue. On a call to seek
// it is usually jumping rows, so everything needs to become possibly
@@ -1799,11 +1870,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
log.debug("leaf: " + node.getContents() + " topKey: " + tk);
}
}
-
+
// Now that all nodes have been seek'd recreate the priorityQueue to sort them properly.
splitLeaves(this.root);
resetNegatives();
-
+
// test Tree, if it's not valid, call next
if (testTreeState() && overallRange.contains(root.getTopKey())) {
if (!negatives.isEmpty()) {
@@ -1813,9 +1884,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
next();
}
}
-
+
if (log.isDebugEnabled()) {
- log.debug("overallRange " + overallRange + " topKey " + this.root.getTopKey() + " contains " + overallRange.contains(this.root.getTopKey()));
+ log.debug("overallRange " + overallRange + " topKey " + this.root.getTopKey() + " contains "
+ + overallRange.contains(this.root.getTopKey()));
}
if (overallRange.contains(this.root.getTopKey()) && this.root.isValid()) {
@@ -1828,14 +1900,14 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
// seek failed in the logic test, but there may be other possible
// values which satisfy the logic tree. Make sure our iterators aren't
// all null, and then call next.
-
+
// if(!root.hasTop()){
if (log.isDebugEnabled()) {
log.debug("seek, testTreeState is false, HEAD(root) does not have top");
}
// check nodes in positives to see if they're all null/outside range
// or if nothing percolated up to root yet.
- List<BooleanLogicTreeNode> removals = new ArrayList<BooleanLogicTreeNode>();
+ List<BooleanLogicTreeNode> removals = new ArrayList<>();
for (BooleanLogicTreeNode node : positives) {
if (!node.hasTop() || !overallRange.contains(node.getTopKey())) {
removals.add(node);
@@ -1848,29 +1920,31 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
return;
}
}
-
+
private int compare(Key k1, Key k2) {
if (k1 != null && k2 != null) {
return k1.compareTo(k2);
} else if (k1 == null && k2 == null) {
return 0;
- } else if (k1 == null) { // in this case, null is considered bigger b/c it's closer to the end of the table.
+ } else if (k1 == null) { // in this case, null is considered bigger b/c it's closer to the end
+ // of the table.
return 1;
} else {
return -1;
}
}
-
+
private void setupRangerators(Range range) throws IOException {
if (rangerators == null || rangerators.isEmpty()) {
return;
}
for (BooleanLogicTreeNode node : rangerators) {
- Set<String> fValues = new HashSet<String>();
+ Set<String> fValues = new HashSet<>();
OrIterator orIter = new OrIterator();
SortedKeyValueIterator<Key,Value> siter = sourceIterator.deepCopy(env);
// create UniqFieldNameValueIterator to find uniq field names values
- UniqFieldNameValueIterator uniq = new UniqFieldNameValueIterator(node.getFieldName(), node.getLowerBound(), node.getUpperBound());
+ UniqFieldNameValueIterator uniq = new UniqFieldNameValueIterator(node.getFieldName(),
+ node.getLowerBound(), node.getUpperBound());
uniq.setSource(siter);
uniq.seek(range, EMPTY_COL_FAMS, false);
while (uniq.hasTop()) {
@@ -1891,18 +1965,19 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
}
node.setUserObject(orIter);
}
-
+
}
-
- /* *************************************************************************
- * Inner classes
+
+ /*
+ * ************************************************************************* Inner classes
*/
public class BooleanLogicTreeNodeComparator implements Comparator<Object> {
-
+
+ @Override
public int compare(Object o1, Object o2) {
BooleanLogicTreeNode n1 = (BooleanLogicTreeNode) o1;
BooleanLogicTreeNode n2 = (BooleanLogicTreeNode) o2;
-
+
Key k1 = n1.getTopKey();
Key k2 = n2.getTopKey();
if (log.isDebugEnabled()) {
@@ -1917,7 +1992,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
log.debug("BooleanLogicTreeNodeComparator \tt1: " + t1 + " t2: " + t2);
}
// return t1.compareTo(t2);
-
+
if (k1 != null && k2 != null) {
return k1.compareTo(k2);
} else if (k1 == null && k2 == null) {
@@ -1927,15 +2002,18 @@ public class BooleanLogicIterator implements SortedKeyValueIterator<Key,Value>,
} else {
return -1;
}
-
+
}
}
-
+
+ @Override
public IteratorOptions describeOptions() {
- return new IteratorOptions(getClass().getSimpleName(), "evaluates event objects against an expression", Collections.singletonMap(QUERY_OPTION,
- "query expression"), null);
+ return new IteratorOptions(getClass().getSimpleName(),
+ "evaluates event objects against an expression",
+ Collections.singletonMap(QUERY_OPTION, "query expression"), null);
}
-
+
+ @Override
public boolean validateOptions(Map<String,String> options) {
if (!options.containsKey(QUERY_OPTION)) {
return false;
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java
index 6783efe..f83de46 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java
@@ -16,31 +16,33 @@
*/
package org.apache.accumulo.examples.wikisearch.iterator;
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import java.io.IOException;
+
import org.apache.accumulo.core.client.sample.SamplerConfiguration;
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.conf.DefaultConfiguration;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.iterators.system.MapFileIterator;
+import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
-
-import java.io.IOException;
public class DefaultIteratorEnvironment implements IteratorEnvironment {
AccumuloConfiguration conf;
public DefaultIteratorEnvironment() {
- this.conf = AccumuloConfiguration.getDefaultConfiguration();
+ this.conf = DefaultConfiguration.getInstance();
}
@Override
- public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
+ public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName)
+ throws IOException {
Configuration conf = CachedConfiguration.getInstance();
FileSystem fs = FileSystem.get(conf);
return new MapFileIterator(this.conf, fs, mapFileName, conf);
@@ -85,4 +87,9 @@ public class DefaultIteratorEnvironment implements IteratorEnvironment {
public IteratorEnvironment cloneWithSamplingEnabled() {
throw new UnsupportedOperationException();
}
+
+ @Override
+ public boolean isUserCompaction() {
+ return false;
+ }
}
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java
index b2a0c83..5ade156 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java
@@ -33,69 +33,70 @@ import org.apache.accumulo.examples.wikisearch.parser.EventFields.FieldValue;
import org.apache.commons.collections.map.LRUMap;
import org.apache.hadoop.io.Text;
-
public class EvaluatingIterator extends AbstractEvaluatingIterator {
-
+
public static final String NULL_BYTE_STRING = "\u0000";
LRUMap visibilityMap = new LRUMap();
-
+
public EvaluatingIterator() {
super();
}
-
+
public EvaluatingIterator(AbstractEvaluatingIterator other, IteratorEnvironment env) {
super(other, env);
}
-
+
+ @Override
public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
return new EvaluatingIterator(this, env);
}
-
+
@Override
public PartialKey getKeyComparator() {
return PartialKey.ROW_COLFAM;
}
-
+
@Override
public Key getReturnKey(Key k) {
- // If we were using column visibility, then we would get the merged visibility here and use it in the key.
+ // If we were using column visibility, then we would get the merged visibility here and use it
+ // in the key.
// Remove the COLQ from the key and use the combined visibility
- Key r = new Key(k.getRowData().getBackingArray(), k.getColumnFamilyData().getBackingArray(), NULL_BYTE, k.getColumnVisibility().getBytes(),
- k.getTimestamp(), k.isDeleted(), false);
+ Key r = new Key(k.getRowData().getBackingArray(), k.getColumnFamilyData().getBackingArray(),
+ NULL_BYTE, k.getColumnVisibility().getBytes(), k.getTimestamp(), k.isDeleted(), false);
return r;
}
-
+
@Override
public void fillMap(EventFields event, Key key, Value value) {
// If we were using column visibility, we would have to merge them here.
-
+
// Pull the datatype from the colf in case we need to do anything datatype specific.
// String colf = key.getColumnFamily().toString();
// String datatype = colf.substring(0, colf.indexOf(NULL_BYTE_STRING));
-
+
// For the partitioned table, the field name and field value are stored in the column qualifier
// separated by a \0.
String colq = key.getColumnQualifier().toString();// .toLowerCase();
int idx = colq.indexOf(NULL_BYTE_STRING);
String fieldName = colq.substring(0, idx);
String fieldValue = colq.substring(idx + 1);
-
+
event.put(fieldName, new FieldValue(getColumnVisibility(key), fieldValue.getBytes()));
}
/**
- * @param key
* @return The column visibility
*/
public ColumnVisibility getColumnVisibility(Key key) {
ColumnVisibility result = (ColumnVisibility) visibilityMap.get(key.getColumnVisibility());
- if (result != null)
+ if (result != null) {
return result;
+ }
result = new ColumnVisibility(key.getColumnVisibility().getBytes());
visibilityMap.put(key.getColumnVisibility(), result);
return result;
}
-
+
/**
* Don't accept this key if the colf starts with 'fi'
*/
@@ -105,11 +106,12 @@ public class EvaluatingIterator extends AbstractEvaluatingIterator {
Key copy = new Key(key.getRow(), new Text("fi\01"));
Collection<ByteSequence> columnFamilies = Collections.emptyList();
this.iterator.seek(new Range(copy, copy), columnFamilies, true);
- if (this.iterator.hasTop())
+ if (this.iterator.hasTop()) {
return isKeyAccepted(this.iterator.getTopKey());
+ }
return true;
}
return true;
}
-
+
}
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
index 78c8576..53752c0 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java
@@ -34,13 +34,14 @@ import org.apache.hadoop.io.Text;
import org.apache.log4j.Logger;
/**
- * An iterator that handles "OR" query constructs on the server side. This code has been adapted/merged from Heap and Multi Iterators.
+ * An iterator that handles "OR" query constructs on the server side. This code has been
+ * adapted/merged from Heap and Multi Iterators.
*/
public class OrIterator implements SortedKeyValueIterator<Key,Value> {
-
+
private TermSource currentTerm;
private ArrayList<TermSource> sources;
- private PriorityQueue<TermSource> sorted = new PriorityQueue<TermSource>(5);
+ private PriorityQueue<TermSource> sorted = new PriorityQueue<>(5);
private static final Text nullText = new Text();
private Key topKey = null;
private Range overallRange;
@@ -48,9 +49,9 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
private boolean inclusive;
protected static final Logger log = Logger.getLogger(OrIterator.class);
private Text parentEndRow;
-
+
protected static class TermSource implements Comparable<TermSource> {
-
+
public SortedKeyValueIterator<Key,Value> iter;
public Text dataLocation;
public Text term;
@@ -58,40 +59,40 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
public Text fieldTerm;
public Key topKey;
public boolean atEnd;
-
+
public TermSource(TermSource other) {
this.iter = other.iter;
this.term = other.term;
this.dataLocation = other.dataLocation;
this.atEnd = other.atEnd;
}
-
+
public TermSource(SortedKeyValueIterator<Key,Value> iter, Text term) {
this.iter = iter;
this.term = term;
this.atEnd = false;
}
-
+
public TermSource(SortedKeyValueIterator<Key,Value> iter, Text dataLocation, Text term) {
this.iter = iter;
this.dataLocation = dataLocation;
this.term = term;
this.atEnd = false;
}
-
+
public void setNew() {
if (!this.atEnd && this.iter.hasTop()) {
this.topKey = this.iter.getTopKey();
-
+
if (log.isDebugEnabled()) {
log.debug("OI.TermSource.setNew TS.iter.topKey >>" + topKey + "<<");
}
-
+
if (this.term == null) {
this.docid = this.topKey.getColumnQualifier();
} else {
String cqString = this.topKey.getColumnQualifier().toString();
-
+
int idx = cqString.indexOf("\0");
this.fieldTerm = new Text(cqString.substring(0, idx));
this.docid = new Text(cqString.substring(idx + 1));
@@ -100,7 +101,7 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
if (log.isDebugEnabled()) {
log.debug("OI.TermSource.setNew Setting to null...");
}
-
+
// this.term = null;
// this.dataLocation = null;
this.topKey = null;
@@ -108,7 +109,8 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
this.docid = null;
}
}
-
+
+ @Override
public int compareTo(TermSource o) {
// NOTE: If your implementation can have more than one row in a tablet,
// you must compare row key here first, then column qualifier.
@@ -116,14 +118,14 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
// sorted after they have been determined to be valid.
// return this.docid.compareTo(o.docid);
// return this.topKey.compareTo(o.topKey);
-
+
// NOTE! We need to compare UID's, not Keys!
Key k1 = topKey;
Key k2 = o.topKey;
// return t1.compareTo(t2);
String uid1 = getUID(k1);
String uid2 = getUID(k2);
-
+
if (uid1 != null && uid2 != null) {
return uid1.compareTo(uid2);
} else if (uid1 == null && uid2 == null) {
@@ -133,107 +135,104 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
} else {
return -1;
}
-
+
}
-
+
@Override
public String toString() {
return "TermSource: " + this.dataLocation + " " + this.term;
}
-
+
public boolean hasTop() {
return this.topKey != null;
}
}
-
+
/**
* Returns the given key's row
- *
- * @param key
+ *
* @return The given key's row
*/
protected Text getPartition(Key key) {
return key.getRow();
}
-
+
/**
* Returns the given key's dataLocation
- *
- * @param key
+ *
* @return The given key's dataLocation
*/
protected Text getDataLocation(Key key) {
return key.getColumnFamily();
}
-
+
/**
* Returns the given key's term
- *
- * @param key
+ *
* @return The given key's term
*/
protected Text getTerm(Key key) {
int idx = 0;
String sKey = key.getColumnQualifier().toString();
-
+
idx = sKey.indexOf("\0");
return new Text(sKey.substring(0, idx));
}
-
+
/**
* Returns the given key's DocID
- *
- * @param key
+ *
* @return The given key's DocID
*/
protected Text getDocID(Key key) {
int idx = 0;
String sKey = key.getColumnQualifier().toString();
-
+
idx = sKey.indexOf("\0");
return new Text(sKey.substring(idx + 1));
}
-
+
/**
* Returns the given key's UID
- *
- * @param key
+ *
* @return The given key's UID
*/
static protected String getUID(Key key) {
try {
int idx = 0;
String sKey = key.getColumnQualifier().toString();
-
+
idx = sKey.indexOf("\0");
return sKey.substring(idx + 1);
} catch (Exception e) {
return null;
}
}
-
+
public OrIterator() {
- this.sources = new ArrayList<TermSource>();
+ this.sources = new ArrayList<>();
}
-
+
private OrIterator(OrIterator other, IteratorEnvironment env) {
- this.sources = new ArrayList<TermSource>();
-
+ this.sources = new ArrayList<>();
+
for (TermSource TS : other.sources) {
this.sources.add(new TermSource(TS.iter.deepCopy(env), TS.dataLocation, TS.term));
}
}
-
+
+ @Override
public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
return new OrIterator(this, env);
}
-
- public void addTerm(SortedKeyValueIterator<Key,Value> source, Text term, IteratorEnvironment env) {
+
+ public void addTerm(SortedKeyValueIterator<Key,Value> source, Text term,
+ IteratorEnvironment env) {
if (log.isDebugEnabled()) {
log.debug("OI.addTerm Added source w/o family");
log.debug("OI.addTerm term >>" + term + "<<");
}
-
+
// Don't deepcopy an iterator
if (term == null) {
this.sources.add(new TermSource(source, term));
@@ -241,13 +240,14 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
this.sources.add(new TermSource(source.deepCopy(env), term));
}
}
-
- public void addTerm(SortedKeyValueIterator<Key,Value> source, Text dataLocation, Text term, IteratorEnvironment env) {
+
+ public void addTerm(SortedKeyValueIterator<Key,Value> source, Text dataLocation, Text term,
+ IteratorEnvironment env) {
if (log.isDebugEnabled()) {
log.debug("OI.addTerm Added source ");
log.debug("OI.addTerm family >>" + dataLocation + "<< term >>" + term + "<<");
}
-
+
// Don't deepcopy an iterator
if (term == null) {
this.sources.add(new TermSource(source, dataLocation, term));
@@ -255,60 +255,65 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
this.sources.add(new TermSource(source.deepCopy(env), dataLocation, term));
}
}
-
+
/**
* Construct the topKey given the current <code>TermSource</code>
- *
- * @param TS
+ *
* @return The top Key for a given TermSource
*/
protected Key buildTopKey(TermSource TS) {
if ((TS == null) || (TS.topKey == null)) {
return null;
}
-
+
if (log.isDebugEnabled()) {
- log.debug("OI.buildTopKey New topKey >>" + new Key(TS.topKey.getRow(), TS.dataLocation, TS.docid) + "<<");
+ log.debug("OI.buildTopKey New topKey >>"
+ + new Key(TS.topKey.getRow(), TS.dataLocation, TS.docid) + "<<");
}
-
+
return new Key(TS.topKey.getRow(), TS.topKey.getColumnFamily(), TS.topKey.getColumnQualifier());
}
-
+
+ @Override
final public void next() throws IOException {
if (log.isDebugEnabled()) {
- log.debug("OI.next Enter: sorted.size = " + sorted.size() + " currentTerm = " + ((currentTerm == null) ? "null" : "not null"));
+ log.debug("OI.next Enter: sorted.size = " + sorted.size() + " currentTerm = "
+ + ((currentTerm == null) ? "null" : "not null"));
}
-
+
if (currentTerm == null) {
if (log.isDebugEnabled()) {
log.debug("OI.next currentTerm is NULL... returning");
}
-
+
topKey = null;
return;
}
-
+
// Advance currentTerm
currentTerm.iter.next();
-
+
advanceToMatch(currentTerm);
-
+
currentTerm.setNew();
-
+
// See if currentTerm is still valid, remove if not
if (log.isDebugEnabled()) {
- log.debug("OI.next Checks (correct = 0,0,0): " + ((currentTerm.topKey != null) ? "0," : "1,") + ((currentTerm.dataLocation != null) ? "0," : "1,")
- + ((currentTerm.term != null && currentTerm.fieldTerm != null) ? (currentTerm.term.compareTo(currentTerm.fieldTerm)) : "0"));
+ log.debug("OI.next Checks (correct = 0,0,0): " + ((currentTerm.topKey != null) ? "0," : "1,")
+ + ((currentTerm.dataLocation != null) ? "0," : "1,")
+ + ((currentTerm.term != null && currentTerm.fieldTerm != null)
+ ? (currentTerm.term.compareTo(currentTerm.fieldTerm)) : "0"));
}
-
- if (currentTerm.topKey == null || ((currentTerm.dataLocation != null) && (currentTerm.term.compareTo(currentTerm.fieldTerm) != 0))) {
+
+ if (currentTerm.topKey == null || ((currentTerm.dataLocation != null)
+ && (currentTerm.term.compareTo(currentTerm.fieldTerm) != 0))) {
if (log.isDebugEnabled()) {
log.debug("OI.next removing entry:" + currentTerm.term);
}
-
+
currentTerm = null;
}
-
+
// optimization.
// if size == 0, currentTerm is the only item left,
// OR there are no items left.
@@ -321,58 +326,62 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
// and get the current top item out.
currentTerm = sorted.poll();
}
-
+
if (log.isDebugEnabled()) {
log.debug("OI.next CurrentTerm is " + ((currentTerm == null) ? "null" : currentTerm));
}
-
+
topKey = buildTopKey(currentTerm);
-
+
if (hasTop()) {
if (overallRange != null && !overallRange.contains(topKey)) {
topKey = null;
}
}
}
-
- public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
-
+
+ @Override
+ public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive)
+ throws IOException {
+
overallRange = new Range(range);
if (log.isDebugEnabled()) {
log.debug("seek, overallRange: " + overallRange);
}
-
+
if (range.getEndKey() != null && range.getEndKey().getRow() != null) {
this.parentEndRow = range.getEndKey().getRow();
}
-
+
if (log.isDebugEnabled()) {
log.debug("OI.seek Entry - sources.size = " + sources.size());
- log.debug("OI.seek Entry - currentTerm = " + ((currentTerm == null) ? "false" : currentTerm.iter.getTopKey()));
- log.debug("OI.seek Entry - Key from Range = " + ((range == null) ? "false" : range.getStartKey()));
+ log.debug("OI.seek Entry - currentTerm = "
+ + ((currentTerm == null) ? "false" : currentTerm.iter.getTopKey()));
+ log.debug(
+ "OI.seek Entry - Key from Range = " + ((range == null) ? "false" : range.getStartKey()));
}
-
+
// If sources.size is 0, there is nothing to process, so just return.
if (sources.isEmpty()) {
currentTerm = null;
topKey = null;
return;
}
-
+
this.columnFamilies = columnFamilies;
this.inclusive = inclusive;
-
+
Range newRange = range;
Key sourceKey = null;
Key startKey = null;
-
+
if (range != null) {
startKey = range.getStartKey();
}
-
+
// Clear the PriorityQueue so that we can re-populate it.
sorted.clear();
-
+
TermSource TS = null;
Iterator<TermSource> iter = sources.iterator();
// For each term, seek forward.
@@ -380,66 +389,73 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
int counter = 1;
while (iter.hasNext()) {
TS = iter.next();
-
+
TS.atEnd = false;
-
+
if (sources.size() == 1) {
currentTerm = TS;
}
-
+
if (log.isDebugEnabled()) {
log.debug("OI.seek on TS >>" + TS + "<<");
log.debug("OI.seek seeking source >>" + counter + "<< ");
}
-
+
counter++;
-
+
newRange = range;
sourceKey = null;
-
+
if (startKey != null) {
// Construct the new key for the range
if (log.isDebugEnabled()) {
log.debug("OI.seek startKey >>" + startKey + "<<");
}
-
+
if (startKey.getColumnQualifier() != null) {
- sourceKey = new Key(startKey.getRow(), (TS.dataLocation == null) ? nullText : TS.dataLocation, new Text(((TS.term == null) ? "" : TS.term + "\0")
- + range.getStartKey().getColumnQualifier()));
+ sourceKey =
+ new Key(startKey.getRow(), (TS.dataLocation == null) ? nullText : TS.dataLocation,
+ new Text(((TS.term == null) ? "" : TS.term + "\0")
+ + range.getStartKey().getColumnQualifier()));
} else {
- sourceKey = new Key(startKey.getRow(), (TS.dataLocation == null) ? nullText : TS.dataLocation, (TS.term == null) ? nullText : TS.term);
+ sourceKey =
+ new Key(startKey.getRow(), (TS.dataLocation == null) ? nullText : TS.dataLocation,
+ (TS.term == null) ? nullText : TS.term);
}
-
+
if (log.isDebugEnabled()) {
log.debug("OI.seek Seeking to the key => " + sourceKey);
}
-
+
newRange = new Range(sourceKey, true, sourceKey.followingKey(PartialKey.ROW), false);
} else {
if (log.isDebugEnabled()) {
log.debug("OI.seek Using the range Seek() argument to seek => " + newRange);
}
}
-
+
TS.iter.seek(newRange, columnFamilies, inclusive);
-
+
TS.setNew();
-
+
// Make sure we're on a key with the correct dataLocation and term
advanceToMatch(TS);
-
+
TS.setNew();
-
+
if (log.isDebugEnabled()) {
log.debug("OI.seek sourceKey >>" + sourceKey + "<< ");
log.debug("OI.seek topKey >>" + ((TS.topKey == null) ? "false" : TS.topKey) + "<< ");
log.debug("OI.seek TS.fieldTerm == " + TS.fieldTerm);
-
- log.debug("OI.seek Checks (correct = 0,0,0 / 0,1,1): " + ((TS.topKey != null) ? "0," : "1,") + ((TS.dataLocation != null) ? "0," : "1,")
- + (((TS.term != null && TS.fieldTerm != null) && (TS.term.compareTo(TS.fieldTerm) != 0)) ? "0" : "1"));
+
+ log.debug("OI.seek Checks (correct = 0,0,0 / 0,1,1): " + ((TS.topKey != null) ? "0," : "1,")
+ + ((TS.dataLocation != null) ? "0," : "1,")
+ + (((TS.term != null && TS.fieldTerm != null) && (TS.term.compareTo(TS.fieldTerm) != 0))
+ ? "0" : "1"));
}
-
- if ((TS.topKey == null) || ((TS.dataLocation != null) && (TS.term.compareTo(TS.fieldTerm) != 0))) {
+
+ if ((TS.topKey == null)
+ || ((TS.dataLocation != null) && (TS.term.compareTo(TS.fieldTerm) != 0))) {
// log.debug("OI.seek Removing " + TS.term);
// iter.remove();
} // Optimization if we only have one element
@@ -451,7 +467,7 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
if (log.isDebugEnabled()) {
log.debug("OI.seek new topKey >>" + ((topKey == null) ? "false" : topKey) + "<< ");
}
-
+
// make sure it is in the range if we have one.
if (hasTop()) {
if (overallRange != null && !overallRange.contains(topKey)) {
@@ -464,25 +480,25 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
return;
}
}
-
+
// And set currentTerm = the next valid key/term.
currentTerm = sorted.poll();
-
+
if (log.isDebugEnabled()) {
log.debug("OI.seek currentTerm = " + currentTerm);
}
-
+
topKey = buildTopKey(currentTerm);
if (topKey == null) {
if (log.isDebugEnabled()) {
log.debug("OI.seek() topKey is null");
}
}
-
+
if (log.isDebugEnabled()) {
log.debug("OI.seek new topKey >>" + ((topKey == null) ? "false" : topKey) + "<< ");
}
-
+
if (hasTop()) {
if (overallRange != null && !overallRange.contains(topKey)) {
if (log.isDebugEnabled()) {
@@ -491,44 +507,49 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
topKey = null;
}
}
-
+
}
-
+
+ @Override
final public Key getTopKey() {
if (log.isDebugEnabled()) {
log.debug("OI.getTopKey key >>" + topKey);
}
-
+
return topKey;
}
-
+
+ @Override
final public Value getTopValue() {
if (log.isDebugEnabled()) {
log.debug("OI.getTopValue key >>" + currentTerm.iter.getTopValue());
}
-
+
return currentTerm.iter.getTopValue();
}
-
+
+ @Override
final public boolean hasTop() {
if (log.isDebugEnabled()) {
log.debug("OI.hasTop = " + ((topKey == null) ? "false" : "true"));
}
-
+
return topKey != null;
}
-
- public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
+
+ @Override
+ public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options,
+ IteratorEnvironment env) throws IOException {
throw new UnsupportedOperationException();
}
-
+
/**
- * Ensures that the current <code>TermSource</code> is pointing to a key with the correct <code>dataLocation</code> and <code>term</code> or sets
- * <code>topKey</code> to null if there is no such key remaining.
- *
+ * Ensures that the current <code>TermSource</code> is pointing to a key with the correct
+ * <code>dataLocation</code> and <code>term</code> or sets <code>topKey</code> to null if there is
+ * no such key remaining.
+ *
* @param TS
* The <code>TermSource</code> to advance
- * @throws IOException
*/
private void advanceToMatch(TermSource TS) throws IOException {
boolean matched = false;
@@ -537,25 +558,26 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
TS.topKey = null;
return;
}
-
+
Key iterTopKey = TS.iter.getTopKey();
-
+
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch current topKey = " + iterTopKey);
}
-
+
// we should compare the row to the end of the range
if (overallRange.getEndKey() != null) {
-
+
if (overallRange != null && !overallRange.contains(TS.iter.getTopKey())) {
if (log.isDebugEnabled()) {
- log.debug("overallRange: " + overallRange + " does not contain TS.iter.topKey: " + TS.iter.getTopKey());
+ log.debug("overallRange: " + overallRange + " does not contain TS.iter.topKey: "
+ + TS.iter.getTopKey());
log.debug("OI.advanceToMatch at the end, returning");
}
-
+
TS.atEnd = true;
TS.topKey = null;
-
+
return;
} else {
if (log.isDebugEnabled()) {
@@ -567,116 +589,118 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
log.debug("OI.advanceToMatch overallRange.getEndKey() == null");
}
}
-
+
// Advance to the correct dataLocation
if (log.isDebugEnabled()) {
log.debug("Comparing dataLocations.");
- log.debug("OI.advanceToMatch dataLocationCompare: " + getDataLocation(iterTopKey) + " == " + TS.dataLocation);
+ log.debug("OI.advanceToMatch dataLocationCompare: " + getDataLocation(iterTopKey) + " == "
+ + TS.dataLocation);
}
-
+
int dataLocationCompare = getDataLocation(iterTopKey).compareTo(TS.dataLocation);
-
+
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch dataLocationCompare = " + dataLocationCompare);
}
-
+
// Make sure we're at a row for this dataLocation
if (dataLocationCompare < 0) {
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch seek to desired dataLocation");
}
-
+
Key seekKey = new Key(iterTopKey.getRow(), TS.dataLocation, nullText);
-
+
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch seeking to => " + seekKey);
}
-
+
TS.iter.seek(new Range(seekKey, true, null, false), columnFamilies, inclusive);
-
+
continue;
} else if (dataLocationCompare > 0) {
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch advanced beyond desired dataLocation, seek to next row");
}
-
+
// Gone past the current dataLocation, seek to the next row
Key seekKey = iterTopKey.followingKey(PartialKey.ROW);
-
+
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch seeking to => " + seekKey);
}
-
+
TS.iter.seek(new Range(seekKey, true, null, false), columnFamilies, inclusive);
-
+
continue;
}
-
+
// Advance to the correct term
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch termCompare: " + getTerm(iterTopKey) + " == " + TS.term);
}
-
+
int termCompare = getTerm(iterTopKey).compareTo(TS.term);
-
+
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch termCompare = " + termCompare);
}
-
+
// Make sure we're at a row for this term
if (termCompare < 0) {
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch seek to desired term");
}
-
+
Key seekKey = new Key(iterTopKey.getRow(), iterTopKey.getColumnFamily(), TS.term);
-
+
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch seeking to => " + seekKey);
}
-
+
TS.iter.seek(new Range(seekKey, true, null, false), columnFamilies, inclusive);
-
+
continue;
} else if (termCompare > 0) {
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch advanced beyond desired term, seek to next row");
}
-
+
// Gone past the current term, seek to the next row
Key seekKey = iterTopKey.followingKey(PartialKey.ROW);
-
+
if (log.isDebugEnabled()) {
log.debug("OI.advanceToMatch seeking to => " + seekKey);
}
-
+
TS.iter.seek(new Range(seekKey, true, null, false), columnFamilies, inclusive);
continue;
}
-
+
// If we made it here, we found a match
matched = true;
}
}
-
+
public boolean jump(Key jumpKey) throws IOException {
if (log.isDebugEnabled()) {
log.debug("OR jump: " + jumpKey);
printTopKeysForTermSources();
}
-
+
// is the jumpKey outside my overall range?
if (parentEndRow != null && parentEndRow.compareTo(jumpKey.getRow()) < 0) {
// can't go there.
if (log.isDebugEnabled()) {
- log.debug("jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow);
+ log.debug(
+ "jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow);
}
return false;
}
-
+
// Clear the PriorityQueue so that we can re-populate it.
sorted.clear();
-
+
// check each term source and jump it if necessary.
for (TermSource ts : sources) {
int comp;
@@ -684,7 +708,8 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
if (log.isDebugEnabled()) {
log.debug("jump called, but ts.topKey is null, this one needs to move to next row.");
}
- Key startKey = new Key(jumpKey.getRow(), ts.dataLocation, new Text(ts.term + "\0" + jumpKey.getColumnFamily()));
+ Key startKey = new Key(jumpKey.getRow(), ts.dataLocation,
+ new Text(ts.term + "\0" + jumpKey.getColumnFamily()));
Key endKey = null;
if (parentEndRow != null) {
endKey = new Key(parentEndRow);
@@ -694,14 +719,15 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
ts.setNew();
advanceToMatch(ts);
ts.setNew();
-
+
} else {
// check row, then uid
comp = this.topKey.getRow().compareTo(jumpKey.getRow());
if (comp > 0) {
if (log.isDebugEnabled()) {
log.debug("jump, our row is ahead of jumpKey.");
- log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow" + parentEndRow);
+ log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow()
+ + " parentEndRow" + parentEndRow);
}
if (ts.hasTop()) {
sorted.add(ts);
@@ -725,21 +751,21 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
// need to check uid
String myUid = getUID(ts.topKey);
String jumpUid = getUID(jumpKey);
-
+
if (log.isDebugEnabled()) {
if (myUid == null) {
log.debug("myUid is null");
} else {
log.debug("myUid: " + myUid);
}
-
+
if (jumpUid == null) {
log.debug("jumpUid is null");
} else {
log.debug("jumpUid: " + jumpUid);
}
}
-
+
int ucomp = myUid.compareTo(jumpUid);
if (ucomp < 0) {
// need to move forward
@@ -761,7 +787,7 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
ts.setNew();
advanceToMatch(ts);
ts.setNew();
-
+
if (log.isDebugEnabled()) {
if (ts.iter.hasTop()) {
log.debug("ts.iter.topkey: " + ts.iter.getTopKey());
@@ -769,10 +795,10 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
log.debug("ts.iter.topKey is null");
}
}
- }// else do nothing, we're ahead of jump key
+ } // else do nothing, we're ahead of jump key
}
}
-
+
// ts should have moved, validate this particular ts.
if (ts.hasTop()) {
if (overallRange != null) {
@@ -790,14 +816,14 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
if (log.isDebugEnabled()) {
log.debug("OI.jump currentTerm = " + currentTerm);
}
-
+
topKey = buildTopKey(currentTerm);
if (log.isDebugEnabled()) {
log.debug("OI.jump new topKey >>" + ((topKey == null) ? "false" : topKey) + "<< ");
}
return hasTop();
}
-
+
private void printTopKeysForTermSources() {
if (log.isDebugEnabled()) {
for (TermSource ts : sources) {
@@ -811,7 +837,7 @@ public class OrIterator implements SortedKeyValueIterator<Key,Value> {
log.debug("ts is null");
}
}
-
+
if (topKey != null) {
log.debug("OrIterator current topKey: " + topKey);
} else {
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
index 5c7c20c..75135be 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java
@@ -69,112 +69,115 @@ import com.google.common.collect.Multimap;
/**
* <pre>
* <h2>Overview</h2>
- * Query implementation that works with the JEXL grammar. This
+ * Query implementation that works with the JEXL grammar. This
* uses the metadata, global index, and partitioned table to return
* results based on the query. Example queries:
- *
+ *
* <b>Single Term Query</b>
* 'foo' - looks in global index for foo, and if any entries are found, then the query
* is rewritten to be field1 == 'foo' or field2 == 'foo', etc. This is then passed
* down the optimized query path which uses the intersecting iterators on the partitioned
* table.
- *
- * <b>Boolean expression</b>
+ *
+ * <b>Boolean expression</b>
* field == 'foo' - For fielded queries, those that contain a field, an operator, and a literal (string or number),
* the query is parsed and the set of eventFields in the query that are indexed is determined by
* querying the metadata table. Depending on the conjunctions in the query (or, and, not) and the
* eventFields that are indexed, the query may be sent down the optimized path or the full scan path.
- *
+ *
* We are not supporting all of the operators that JEXL supports at this time. We are supporting the following operators:
- *
+ *
* ==, !=, >, ≥, <, ≤, =~, and !~
- *
+ *
* Custom functions can be created and registered with the Jexl engine. The functions can be used in the queries in conjunction
* with other supported operators. A sample function has been created, called between, and is bound to the 'f' namespace. An
* example using this function is : "f:between(LATITUDE,60.0, 70.0)"
- *
+ *
* <h2>Constraints on Query Structure</h2>
* Queries that are sent to this class need to be formatted such that there is a space on either side of the operator. We are
- * rewriting the query in some cases and the current implementation is expecting a space on either side of the operator. If
+ * rewriting the query in some cases and the current implementation is expecting a space on either side of the operator. If
* an error occurs in the evaluation we are skipping the event.
- *
+ *
* <h2>Notes on Optimization</h2>
* Queries that meet any of the following criteria will perform a full scan of the events in the partitioned table:
- *
+ *
* 1. An 'or' conjunction exists in the query but not all of the terms are indexed.
* 2. No indexed terms exist in the query
* 3. An unsupported operator exists in the query
- *
+ *
* </pre>
- *
+ *
*/
public abstract class AbstractQueryLogic {
-
+
protected static Logger log = Logger.getLogger(AbstractQueryLogic.class);
-
+
/**
* Set of datatypes to limit the query to.
*/
public static final String DATATYPE_FILTER_SET = "datatype.filter.set";
-
+
private static class DoNotPerformOptimizedQueryException extends Exception {
private static final long serialVersionUID = 1L;
}
-
+
/**
- * Object that is used to hold ranges found in the index. Subclasses may compute the final range set in various ways.
+ * Object that is used to hold ranges found in the index. Subclasses may compute the final range
+ * set in various ways.
*/
public static abstract class IndexRanges {
-
+
private Map<String,String> indexValuesToOriginalValues = null;
private Multimap<String,String> fieldNamesAndValues = HashMultimap.create();
- private Map<String,Long> termCardinality = new HashMap<String,Long>();
- protected Map<String,TreeSet<Range>> ranges = new HashMap<String,TreeSet<Range>>();
-
+ private Map<String,Long> termCardinality = new HashMap<>();
+ protected Map<String,TreeSet<Range>> ranges = new HashMap<>();
+
public Multimap<String,String> getFieldNamesAndValues() {
return fieldNamesAndValues;
}
-
+
public void setFieldNamesAndValues(Multimap<String,String> fieldNamesAndValues) {
this.fieldNamesAndValues = fieldNamesAndValues;
}
-
+
public final Map<String,Long> getTermCardinality() {
return termCardinality;
}
-
+
public Map<String,String> getIndexValuesToOriginalValues() {
return indexValuesToOriginalValues;
}
-
+
public void setIndexValuesToOriginalValues(Map<String,String> indexValuesToOriginalValues) {
this.indexValuesToOriginalValues = indexValuesToOriginalValues;
}
-
+
public abstract void add(String term, Range r);
-
+
public abstract Set<Range> getRanges();
}
-
+
/**
- * Object that computes the ranges by unioning all of the ranges for all of the terms together. In the case where ranges overlap, the largest range is used.
+ * Object that computes the ranges by unioning all of the ranges for all of the terms together. In
+ * the case where ranges overlap, the largest range is used.
*/
public static class UnionIndexRanges extends IndexRanges {
-
+
public static String DEFAULT_KEY = "default";
-
+
public UnionIndexRanges() {
this.ranges.put(DEFAULT_KEY, new TreeSet<Range>());
}
-
+
+ @Override
public Set<Range> getRanges() {
// So the set of ranges is ordered. It *should* be the case that
// ranges with partition ids will sort before ranges that point to
// a specific event. Populate a new set of ranges but don't add a
// range for an event where that range is contained in a range already
// added.
- Set<Text> shardsAdded = new HashSet<Text>();
- Set<Range> returnSet = new HashSet<Range>();
+ Set<Text> shardsAdded = new HashSet<>();
+ Set<Range> returnSet = new HashSet<>();
for (Range r : ranges.get(DEFAULT_KEY)) {
if (!shardsAdded.contains(r.getStartKey().getRow())) {
// Only add ranges with a start key for the entire partition.
@@ -184,18 +187,20 @@ public abstract class AbstractQueryLogic {
returnSet.add(r);
} else {
// if (log.isTraceEnabled())
- log.info("Skipping event specific range: " + r.toString() + " because range has already been added: "
+ log.info("Skipping event specific range: " + r.toString()
+ + " because range has already been added: "
+ shardsAdded.contains(r.getStartKey().getRow()));
}
}
return returnSet;
}
-
+
+ @Override
public void add(String term, Range r) {
ranges.get(DEFAULT_KEY).add(r);
}
}
-
+
private String metadataTableName;
private String indexTableName;
private String reverseIndexTableName;
@@ -207,35 +212,31 @@ public abstract class AbstractQueryLogic {
private Kryo kryo = new Kryo();
private EventFields eventFields = new EventFields();
private List<String> unevaluatedFields = null;
- private Map<Class<? extends Normalizer>,Normalizer> normalizerCacheMap = new HashMap<Class<? extends Normalizer>,Normalizer>();
+ private Map<Class<? extends Normalizer>,Normalizer> normalizerCacheMap = new HashMap<>();
private static final String NULL_BYTE = "\u0000";
-
+
public AbstractQueryLogic() {
super();
EventFields.initializeKryo(kryo);
}
-
+
/**
* Queries metadata table to determine which terms are indexed.
- *
- * @param c
- * @param auths
- * @param queryLiterals
+ *
* @param datatypes
* - optional list of types
* @return map of indexed field names to types to normalizers used in this date range
- * @throws TableNotFoundException
- * @throws IllegalAccessException
- * @throws InstantiationException
*/
- protected Map<String,Multimap<String,Class<? extends Normalizer>>> findIndexedTerms(Connector c, Authorizations auths, Set<String> queryLiterals,
- Set<String> datatypes) throws TableNotFoundException, InstantiationException, IllegalAccessException {
-
- Map<String,Multimap<String,Class<? extends Normalizer>>> results = new HashMap<String,Multimap<String,Class<? extends Normalizer>>>();
-
+ protected Map<String,Multimap<String,Class<? extends Normalizer>>> findIndexedTerms(Connector c,
+ Authorizations auths, Set<String> queryLiterals, Set<String> datatypes)
+ throws TableNotFoundException, InstantiationException, IllegalAccessException {
+
+ Map<String,Multimap<String,Class<? extends Normalizer>>> results = new HashMap<>();
+
for (String literal : queryLiterals) {
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Querying " + this.getMetadataTableName() + " table for " + literal);
+ }
Range range = new Range(literal.toUpperCase());
Scanner scanner = c.createScanner(this.getMetadataTableName(), auths);
scanner.setRange(range);
@@ -252,13 +253,16 @@ public abstract class AbstractQueryLogic {
if (idx != -1) {
String type = colq.substring(0, idx);
// If types are specified and this type is not in the list then skip it.
- if (null != datatypes && !datatypes.contains(type))
+ if (null != datatypes && !datatypes.contains(type)) {
continue;
+ }
try {
@SuppressWarnings("unchecked")
- Class<? extends Normalizer> clazz = (Class<? extends Normalizer>) Class.forName(colq.substring(idx + 1));
- if (!normalizerCacheMap.containsKey(clazz))
+ Class<? extends Normalizer> clazz =
+ (Class<? extends Normalizer>) Class.forName(colq.substring(idx + 1));
+ if (!normalizerCacheMap.containsKey(clazz)) {
normalizerCacheMap.put(clazz, clazz.newInstance());
+ }
results.get(literal).put(type, clazz);
} catch (ClassNotFoundException e) {
log.error("Unable to find normalizer on class path: " + colq.substring(idx + 1), e);
@@ -272,26 +276,25 @@ public abstract class AbstractQueryLogic {
}
}
}
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("METADATA RESULTS: " + results.toString());
+ }
return results;
}
-
+
/**
* Performs a lookup in the global index for a single non-fielded term.
- *
- * @param c
- * @param auths
- * @param value
+ *
* @param datatypes
* - optional list of types
* @return ranges that fit into the date range.
*/
- protected abstract IndexRanges getTermIndexInformation(Connector c, Authorizations auths, String value, Set<String> datatypes) throws TableNotFoundException;
-
+ protected abstract IndexRanges getTermIndexInformation(Connector c, Authorizations auths,
+ String value, Set<String> datatypes) throws TableNotFoundException;
+
/**
* Performs a lookup in the global index / reverse index and returns a RangeCalculator
- *
+ *
* @param c
* Accumulo connection
* @param auths
@@ -300,107 +303,107 @@ public abstract class AbstractQueryLogic {
* multimap of indexed field name and Normalizers used
* @param terms
* multimap of field name and QueryTerm object
- * @param indexTableName
- * @param reverseIndexTableName
* @param queryString
* original query string
- * @param queryThreads
* @param datatypes
* - optional list of types
* @return range calculator
- * @throws TableNotFoundException
*/
- protected abstract RangeCalculator getTermIndexInformation(Connector c, Authorizations auths, Multimap<String,Normalizer> indexedTerms,
- Multimap<String,QueryTerm> terms, String indexTableName, String reverseIndexTableName, String queryString, int queryThreads, Set<String> datatypes)
+ protected abstract RangeCalculator getTermIndexInformation(Connector c, Authorizations auths,
+ Multimap<String,Normalizer> indexedTerms, Multimap<String,QueryTerm> terms,
+ String indexTableName, String reverseIndexTableName, String queryString, int queryThreads,
+ Set<String> datatypes)
throws TableNotFoundException, org.apache.commons.jexl2.parser.ParseException;
-
- protected abstract Collection<Range> getFullScanRange(Date begin, Date end, Multimap<String,QueryTerm> terms);
-
+
+ protected abstract Collection<Range> getFullScanRange(Date begin, Date end,
+ Multimap<String,QueryTerm> terms);
+
public String getMetadataTableName() {
return metadataTableName;
}
-
+
public String getIndexTableName() {
return indexTableName;
}
-
+
public String getTableName() {
return tableName;
}
-
+
public void setMetadataTableName(String metadataTableName) {
this.metadataTableName = metadataTableName;
}
-
+
public void setIndexTableName(String indexTableName) {
this.indexTableName = indexTableName;
}
-
+
public void setTableName(String tableName) {
this.tableName = tableName;
}
-
+
public int getQueryThreads() {
return queryThreads;
}
-
+
public void setQueryThreads(int queryThreads) {
this.queryThreads = queryThreads;
}
-
+
public String getReadAheadQueueSize() {
return readAheadQueueSize;
}
-
+
public String getReadAheadTimeOut() {
return readAheadTimeOut;
}
-
+
public boolean isUseReadAheadIterator() {
return useReadAheadIterator;
}
-
+
public void setReadAheadQueueSize(String readAheadQueueSize) {
this.readAheadQueueSize = readAheadQueueSize;
}
-
+
public void setReadAheadTimeOut(String readAheadTimeOut) {
this.readAheadTimeOut = readAheadTimeOut;
}
-
+
public void setUseReadAheadIterator(boolean useReadAheadIterator) {
this.useReadAheadIterator = useReadAheadIterator;
}
-
+
public String getReverseIndexTableName() {
return reverseIndexTableName;
}
-
+
public void setReverseIndexTableName(String reverseIndexTableName) {
this.reverseIndexTableName = reverseIndexTableName;
}
-
+
public List<String> getUnevaluatedFields() {
return unevaluatedFields;
}
-
+
public void setUnevaluatedFields(List<String> unevaluatedFields) {
this.unevaluatedFields = unevaluatedFields;
}
-
+
public void setUnevaluatedFields(String unevaluatedFieldList) {
- this.unevaluatedFields = new ArrayList<String>();
- for (String field : unevaluatedFieldList.split(","))
+ this.unevaluatedFields = new ArrayList<>();
+ for (String field : unevaluatedFieldList.split(",")) {
this.unevaluatedFields.add(field);
+ }
}
-
+
public Document createDocument(Key key, Value value) {
Document doc = new Document();
eventFields.clear();
ByteBuffer buf = ByteBuffer.wrap(value.get());
eventFields.readObjectData(kryo, buf);
-
+
// Set the id to the document id which is located in the colf
String row = key.getRow().toString();
String colf = key.getColumnFamily().toString();
@@ -416,48 +419,50 @@ public abstract class AbstractQueryLogic {
doc.getFields().add(val);
}
}
-
+
// Add the pointer for the content.
Field docPointer = new Field();
docPointer.setFieldName("DOCUMENT");
docPointer.setFieldValue("DOCUMENT:" + row + "/" + type + "/" + id);
doc.getFields().add(docPointer);
-
+
return doc;
}
-
+
public String getResultsKey(Entry<Key,Value> key) {
// Use the colf from the table, it contains the uuid and datatype
return key.getKey().getColumnFamily().toString();
}
-
- public Results runQuery(Connector connector, List<String> authorizations, String query, Date beginDate, Date endDate, Set<String> types) {
-
+
+ public Results runQuery(Connector connector, List<String> authorizations, String query,
+ Date beginDate, Date endDate, Set<String> types) {
+
if (StringUtils.isEmpty(query)) {
- throw new IllegalArgumentException("NULL QueryNode reference passed to " + this.getClass().getSimpleName());
+ throw new IllegalArgumentException(
+ "NULL QueryNode reference passed to " + this.getClass().getSimpleName());
}
-
- Set<Range> ranges = new HashSet<Range>();
+
+ Set<Range> ranges = new HashSet<>();
Set<String> typeFilter = types;
String array[] = authorizations.toArray(new String[0]);
Authorizations auths = new Authorizations(array);
Results results = new Results();
-
+
// Get the query string
String queryString = query;
-
+
StopWatch abstractQueryLogic = new StopWatch();
StopWatch optimizedQuery = new StopWatch();
StopWatch queryGlobalIndex = new StopWatch();
StopWatch optimizedEventQuery = new StopWatch();
StopWatch fullScanQuery = new StopWatch();
StopWatch processResults = new StopWatch();
-
+
abstractQueryLogic.start();
-
+
StopWatch parseQuery = new StopWatch();
parseQuery.start();
-
+
QueryParser parser;
try {
if (log.isDebugEnabled()) {
@@ -473,8 +478,8 @@ public abstract class AbstractQueryLogic {
if (log.isDebugEnabled()) {
log.debug(hash + " Query: " + queryString);
}
-
- Set<String> fields = new HashSet<String>();
+
+ Set<String> fields = new HashSet<>();
for (String f : parser.getQueryIdentifiers()) {
fields.add(f);
}
@@ -484,14 +489,14 @@ public abstract class AbstractQueryLogic {
// Remove any negated fields from the fields list, we don't want to lookup negated fields
// in the index.
fields.removeAll(parser.getNegatedTermsForOptimizer());
-
+
if (log.isDebugEnabled()) {
log.debug("getQueryIdentifiers: " + parser.getQueryIdentifiers().toString());
}
// Get the mapping of field name to QueryTerm object from the query. The query term object
// contains the operator, whether its negated or not, and the literal to test against.
Multimap<String,QueryTerm> terms = parser.getQueryTerms();
-
+
// Find out which terms are indexed
// TODO: Should we cache indexed terms or does that not make sense since we are always
// loading data.
@@ -503,10 +508,11 @@ public abstract class AbstractQueryLogic {
} catch (Exception e1) {
throw new RuntimeException("Error in metadata lookup", e1);
}
-
+
// Create a map of indexed term to set of normalizers for it
Multimap<String,Normalizer> indexedTerms = HashMultimap.create();
- for (Entry<String,Multimap<String,Class<? extends Normalizer>>> entry : metadataResults.entrySet()) {
+ for (Entry<String,Multimap<String,Class<? extends Normalizer>>> entry : metadataResults
+ .entrySet()) {
// Get the normalizer from the normalizer cache
for (Class<? extends Normalizer> clazz : entry.getValue().values()) {
indexedTerms.put(entry.getKey(), normalizerCacheMap.get(clazz));
@@ -516,35 +522,41 @@ public abstract class AbstractQueryLogic {
if (log.isDebugEnabled()) {
log.debug(hash + " Indexed Terms: " + indexedTerms.toString());
}
-
+
Set<String> orTerms = parser.getOrTermsForOptimizer();
-
+
// Iterate over the query terms to get the operators specified in the query.
- ArrayList<String> unevaluatedExpressions = new ArrayList<String>();
+ ArrayList<String> unevaluatedExpressions = new ArrayList<>();
boolean unsupportedOperatorSpecified = false;
for (Entry<String,QueryTerm> entry : terms.entries()) {
if (null == entry.getValue()) {
continue;
}
-
- if (null != this.unevaluatedFields && this.unevaluatedFields.contains(entry.getKey().trim())) {
- unevaluatedExpressions.add(entry.getKey().trim() + " " + entry.getValue().getOperator() + " " + entry.getValue().getValue());
+
+ if (null != this.unevaluatedFields
+ && this.unevaluatedFields.contains(entry.getKey().trim())) {
+ unevaluatedExpressions.add(entry.getKey().trim() + " " + entry.getValue().getOperator()
+ + " " + entry.getValue().getValue());
}
-
+
int operator = JexlOperatorConstants.getJJTNodeType(entry.getValue().getOperator());
- if (!(operator == ParserTreeConstants.JJTEQNODE || operator == ParserTreeConstants.JJTNENODE || operator == ParserTreeConstants.JJTLENODE
- || operator == ParserTreeConstants.JJTLTNODE || operator == ParserTreeConstants.JJTGENODE || operator == ParserTreeConstants.JJTGTNODE || operator == ParserTreeConstants.JJTERNODE)) {
+ if (!(operator == ParserTreeConstants.JJTEQNODE || operator == ParserTreeConstants.JJTNENODE
+ || operator == ParserTreeConstants.JJTLENODE || operator == ParserTreeConstants.JJTLTNODE
+ || operator == ParserTreeConstants.JJTGENODE || operator == ParserTreeConstants.JJTGTNODE
+ || operator == ParserTreeConstants.JJTERNODE)) {
unsupportedOperatorSpecified = true;
break;
}
}
- if (null != unevaluatedExpressions)
+ if (null != unevaluatedExpressions) {
unevaluatedExpressions.trimToSize();
+ }
if (log.isDebugEnabled()) {
- log.debug(hash + " unsupportedOperators: " + unsupportedOperatorSpecified + " indexedTerms: " + indexedTerms.toString() + " orTerms: "
- + orTerms.toString() + " unevaluatedExpressions: " + unevaluatedExpressions.toString());
+ log.debug(hash + " unsupportedOperators: " + unsupportedOperatorSpecified + " indexedTerms: "
+ + indexedTerms.toString() + " orTerms: " + orTerms.toString()
+ + " unevaluatedExpressions: " + unevaluatedExpressions.toString());
}
-
+
// We can use the intersecting iterator over the field index as an optimization under the
// following conditions
//
@@ -554,8 +566,10 @@ public abstract class AbstractQueryLogic {
// 1. No unsupported operators in the query.
// 2. and all terms indexed
// or
- // 1. All or'd terms are indexed. NOTE, this will potentially skip some queries and push to a full table scan
- // // WE should look into finding a better way to handle whether we do an optimized query or not.
+ // 1. All or'd terms are indexed. NOTE, this will potentially skip some queries and push to a
+ // full table scan
+ // // WE should look into finding a better way to handle whether we do an optimized query or
+ // not.
boolean optimizationSucceeded = false;
boolean orsAllIndexed = false;
if (orTerms.isEmpty()) {
@@ -563,20 +577,24 @@ public abstract class AbstractQueryLogic {
} else {
orsAllIndexed = indexedTerms.keySet().containsAll(orTerms);
}
-
+
if (log.isDebugEnabled()) {
log.debug("All or terms are indexed");
}
-
+
if (!unsupportedOperatorSpecified
- && (((null == orTerms || orTerms.isEmpty()) && indexedTerms.size() > 0) || (fields.size() > 0 && indexedTerms.size() == fields.size()) || orsAllIndexed)) {
+ && (((null == orTerms || orTerms.isEmpty()) && indexedTerms.size() > 0)
+ || (fields.size() > 0 && indexedTerms.size() == fields.size()) || orsAllIndexed)) {
optimizedQuery.start();
// Set up intersecting iterator over field index.
-
- // Get information from the global index for the indexed terms. The results object will contain the term
- // mapped to an object that contains the total count, and partitions where this term is located.
-
- // TODO: Should we cache indexed term information or does that not make sense since we are always loading data
+
+ // Get information from the global index for the indexed terms. The results object will
+ // contain the term
+ // mapped to an object that contains the total count, and partitions where this term is
+ // located.
+
+ // TODO: Should we cache indexed term information or does that not make sense since we are
+ // always loading data
queryGlobalIndex.start();
IndexRanges termIndexInfo;
try {
@@ -585,7 +603,8 @@ public abstract class AbstractQueryLogic {
if (fields.isEmpty()) {
termIndexInfo = this.getTermIndexInformation(connector, auths, queryString, typeFilter);
if (null != termIndexInfo && termIndexInfo.getRanges().isEmpty()) {
- // Then we didn't find anything in the index for this query. This may happen for an indexed term that has wildcards
+ // Then we didn't find anything in the index for this query. This may happen for an
+ // indexed term that has wildcards
// in unhandled locations.
// Break out of here by throwing a named exception and do full scan
throw new DoNotPerformOptimizedQueryException();
@@ -605,17 +624,20 @@ public abstract class AbstractQueryLogic {
sep = " or ";
}
if (log.isDebugEnabled()) {
- log.debug("Rewrote query for non-fielded single term query: " + queryString + " to " + buf.toString());
+ log.debug("Rewrote query for non-fielded single term query: " + queryString + " to "
+ + buf.toString());
}
queryString = buf.toString();
} else {
throw new RuntimeException("Unexpected IndexRanges implementation");
}
} else {
- RangeCalculator calc = this.getTermIndexInformation(connector, auths, indexedTerms, terms, this.getIndexTableName(), this.getReverseIndexTableName(),
- queryString, this.queryThreads, typeFilter);
+ RangeCalculator calc = this.getTermIndexInformation(connector, auths, indexedTerms, terms,
+ this.getIndexTableName(), this.getReverseIndexTableName(), queryString,
+ this.queryThreads, typeFilter);
if (null == calc.getResult() || calc.getResult().isEmpty()) {
- // Then we didn't find anything in the index for this query. This may happen for an indexed term that has wildcards
+ // Then we didn't find anything in the index for this query. This may happen for an
+ // indexed term that has wildcards
// in unhandled locations.
// Break out of here by throwing a named exception and do full scan
throw new DoNotPerformOptimizedQueryException();
@@ -639,12 +661,13 @@ public abstract class AbstractQueryLogic {
termIndexInfo = null;
}
queryGlobalIndex.stop();
-
+
// Determine if we should proceed with optimized query based on results from the global index
boolean proceed = false;
if (null == termIndexInfo || termIndexInfo.getFieldNamesAndValues().values().size() == 0) {
proceed = false;
- } else if (null != orTerms && orTerms.size() > 0 && (termIndexInfo.getFieldNamesAndValues().values().size() == indexedTerms.size())) {
+ } else if (null != orTerms && orTerms.size() > 0
+ && (termIndexInfo.getFieldNamesAndValues().values().size() == indexedTerms.size())) {
proceed = true;
} else if (termIndexInfo.getFieldNamesAndValues().values().size() > 0) {
proceed = true;
@@ -655,12 +678,14 @@ public abstract class AbstractQueryLogic {
}
if (log.isDebugEnabled()) {
log.debug("Proceed with optimized query: " + proceed);
- if (null != termIndexInfo)
- log.debug("termIndexInfo.getTermsFound().size(): " + termIndexInfo.getFieldNamesAndValues().values().size() + " indexedTerms.size: "
+ if (null != termIndexInfo) {
+ log.debug("termIndexInfo.getTermsFound().size(): "
+ + termIndexInfo.getFieldNamesAndValues().values().size() + " indexedTerms.size: "
+ indexedTerms.size() + " fields.size: " + fields.size());
+ }
}
if (proceed) {
-
+
if (log.isDebugEnabled()) {
log.debug(hash + " Performing optimized query");
}
@@ -669,7 +694,7 @@ public abstract class AbstractQueryLogic {
if (log.isDebugEnabled()) {
log.info(hash + " Ranges: count: " + ranges.size() + ", " + ranges.toString());
}
-
+
// Create BatchScanner, set the ranges, and setup the iterators.
optimizedEventQuery.start();
BatchScanner bs = null;
@@ -677,13 +702,15 @@ public abstract class AbstractQueryLogic {
bs = connector.createBatchScanner(this.getTableName(), auths, queryThreads);
bs.setRanges(ranges);
IteratorSetting si = new IteratorSetting(21, "eval", OptimizedQueryIterator.class);
-
+
if (log.isDebugEnabled()) {
- log.debug("Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString);
+ log.debug(
+ "Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString);
}
// Set the query option
si.addOption(EvaluatingIterator.QUERY_OPTION, queryString);
- // Set the Indexed Terms List option. This is the field name and normalized field value pair separated
+ // Set the Indexed Terms List option. This is the field name and normalized field value
+ // pair separated
// by a comma.
StringBuilder buf = new StringBuilder();
String sep = "";
@@ -699,14 +726,16 @@ public abstract class AbstractQueryLogic {
}
}
if (log.isDebugEnabled()) {
- log.debug("Setting scan option: " + FieldIndexQueryReWriter.INDEXED_TERMS_LIST + " to " + buf.toString());
+ log.debug("Setting scan option: " + FieldIndexQueryReWriter.INDEXED_TERMS_LIST + " to "
+ + buf.toString());
}
FieldIndexQueryReWriter rewriter = new FieldIndexQueryReWriter();
String q = "";
try {
q = queryString;
- q = rewriter.applyCaseSensitivity(q, true, false);// Set upper/lower case for fieldname/fieldvalue
- Map<String,String> opts = new HashMap<String,String>();
+ q = rewriter.applyCaseSensitivity(q, true, false);// Set upper/lower case for
+ // fieldname/fieldvalue
+ Map<String,String> opts = new HashMap<>();
opts.put(FieldIndexQueryReWriter.INDEXED_TERMS_LIST, buf.toString());
q = rewriter.removeNonIndexedTermsAndInvalidRanges(q, opts);
q = rewriter.applyNormalizedTerms(q, opts);
@@ -719,7 +748,7 @@ public abstract class AbstractQueryLogic {
log.error("Problem rewriting query, Exception: " + ex.getMessage());
}
si.addOption(BooleanLogicIterator.FIELD_INDEX_QUERY, q);
-
+
// Set the term cardinality option
sep = "";
buf.delete(0, buf.length());
@@ -730,18 +759,21 @@ public abstract class AbstractQueryLogic {
buf.append(entry.getValue());
sep = ",";
}
- if (log.isDebugEnabled())
- log.debug("Setting scan option: " + BooleanLogicIterator.TERM_CARDINALITIES + " to " + buf.toString());
+ if (log.isDebugEnabled()) {
+ log.debug("Setting scan option: " + BooleanLogicIterator.TERM_CARDINALITIES + " to "
+ + buf.toString());
+ }
si.addOption(BooleanLogicIterator.TERM_CARDINALITIES, buf.toString());
if (this.useReadAheadIterator) {
if (log.isDebugEnabled()) {
- log.debug("Enabling read ahead iterator with queue size: " + this.readAheadQueueSize + " and timeout: " + this.readAheadTimeOut);
+ log.debug("Enabling read ahead iterator with queue size: " + this.readAheadQueueSize
+ + " and timeout: " + this.readAheadTimeOut);
}
si.addOption(ReadAheadIterator.QUEUE_SIZE, this.readAheadQueueSize);
si.addOption(ReadAheadIterator.TIMEOUT, this.readAheadTimeOut);
-
+
}
-
+
if (null != unevaluatedExpressions) {
StringBuilder unevaluatedExpressionList = new StringBuilder();
String sep2 = "";
@@ -749,13 +781,16 @@ public abstract class AbstractQueryLogic {
unevaluatedExpressionList.append(sep2).append(exp);
sep2 = ",";
}
- if (log.isDebugEnabled())
- log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + unevaluatedExpressionList.toString());
- si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, unevaluatedExpressionList.toString());
+ if (log.isDebugEnabled()) {
+ log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to "
+ + unevaluatedExpressionList.toString());
+ }
+ si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS,
+ unevaluatedExpressionList.toString());
}
-
+
bs.addScanIterator(si);
-
+
processResults.start();
processResults.suspend();
long count = 0;
@@ -784,16 +819,19 @@ public abstract class AbstractQueryLogic {
}
optimizedQuery.stop();
}
-
+
// WE should look into finding a better way to handle whether we do an optimized query or not.
- // We are not setting up an else condition here because we may have aborted the logic early in the if statement.
- if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && (indexedTerms.size() != fields.size()) && !orsAllIndexed)) {
- // if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && (indexedTerms.size() != fields.size()))) {
+ // We are not setting up an else condition here because we may have aborted the logic early in
+ // the if statement.
+ if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0)
+ && (indexedTerms.size() != fields.size()) && !orsAllIndexed)) {
+ // if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) &&
+ // (indexedTerms.size() != fields.size()))) {
fullScanQuery.start();
if (log.isDebugEnabled()) {
log.debug(hash + " Performing full scan query");
}
-
+
// Set up a full scan using the date ranges from the query
// Create BatchScanner, set the ranges, and setup the iterators.
BatchScanner bs = null;
@@ -801,11 +839,11 @@ public abstract class AbstractQueryLogic {
// The ranges are the start and end dates
Collection<Range> r = getFullScanRange(beginDate, endDate, terms);
ranges.addAll(r);
-
+
if (log.isDebugEnabled()) {
log.debug(hash + " Ranges: count: " + ranges.size() + ", " + ranges.toString());
}
-
+
bs = connector.createBatchScanner(this.getTableName(), auths, queryThreads);
bs.setRanges(ranges);
IteratorSetting si = new IteratorSetting(22, "eval", EvaluatingIterator.class);
@@ -817,14 +855,16 @@ public abstract class AbstractQueryLogic {
buf.append(s).append(type).append(".*");
s = "|";
}
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Setting colf regex iterator to: " + buf.toString());
+ }
IteratorSetting ri = new IteratorSetting(21, "typeFilter", RegExFilter.class);
RegExFilter.setRegexs(ri, null, buf.toString(), null, null, false);
bs.addScanIterator(ri);
}
if (log.isDebugEnabled()) {
- log.debug("Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString);
+ log.debug(
+ "Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString);
}
si.addOption(EvaluatingIterator.QUERY_OPTION, queryString);
if (null != unevaluatedExpressions) {
@@ -834,9 +874,12 @@ public abstract class AbstractQueryLogic {
unevaluatedExpressionList.append(sep2).append(exp);
sep2 = ",";
}
- if (log.isDebugEnabled())
- log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + unevaluatedExpressionList.toString());
- si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, unevaluatedExpressionList.toString());
+ if (log.isDebugEnabled()) {
+ log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to "
+ + unevaluatedExpressionList.toString());
+ }
+ si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS,
+ unevaluatedExpressionList.toString());
}
bs.addScanIterator(si);
long count = 0;
@@ -863,7 +906,7 @@ public abstract class AbstractQueryLogic {
}
fullScanQuery.stop();
}
-
+
log.info("AbstractQueryLogic: " + queryString + " " + timeString(abstractQueryLogic.getTime()));
log.info(" 1) parse query " + timeString(parseQuery.getTime()));
log.info(" 2) query metadata " + timeString(queryMetadata.getTime()));
@@ -872,12 +915,12 @@ public abstract class AbstractQueryLogic {
log.info(" 1) process results " + timeString(processResults.getTime()));
log.info(" 1) query global index " + timeString(queryGlobalIndex.getTime()));
log.info(hash + " Query completed.");
-
+
return results;
}
-
+
private static String timeString(long millis) {
return String.format("%4.2f", millis / 1000.);
}
-
+
}
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java
index acfb4f4..7e4fd90 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java
@@ -49,52 +49,52 @@ import org.apache.hadoop.io.Text;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
-
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
/**
- * The server-side field index queries can only support operations on indexed fields. Additionally, queries that have differing ranges (i.e. one range at the
- * fieldname level and another at the fieldValue level) are not currently supported. This class removes these conflicts from the query as well as sets proper
- * capitalization configurations etc.
- *
- * Once the query has been modified, you can pass it to the BooleanLogicIterator on the server-side via the options map.
- *
+ * The server-side field index queries can only support operations on indexed fields. Additionally,
+ * queries that have differing ranges (i.e. one range at the fieldname level and another at the
+ * fieldValue level) are not currently supported. This class removes these conflicts from the query
+ * as well as sets proper capitalization configurations etc.
+ *
+ * Once the query has been modified, you can pass it to the BooleanLogicIterator on the server-side
+ * via the options map.
+ *
*/
public class FieldIndexQueryReWriter {
-
+
protected static final Logger log = Logger.getLogger(FieldIndexQueryReWriter.class);
- public static final String INDEXED_TERMS_LIST = "INDEXED_TERMS_LIST"; // comma separated list of indexed terms.
+ public static final String INDEXED_TERMS_LIST = "INDEXED_TERMS_LIST"; // comma separated list of
+ // indexed terms.
public static Set<Integer> rangeNodeSet;
-
+
static {
- rangeNodeSet = new HashSet<Integer>();
+ rangeNodeSet = new HashSet<>();
rangeNodeSet.add(ParserTreeConstants.JJTLENODE);
rangeNodeSet.add(ParserTreeConstants.JJTLTNODE);
rangeNodeSet.add(ParserTreeConstants.JJTGENODE);
rangeNodeSet.add(ParserTreeConstants.JJTGTNODE);
rangeNodeSet = Collections.unmodifiableSet(rangeNodeSet);
}
-
+
/*
* Given a JEXL Query, rewrite it and return it.
- *
- * 1. ParseQuery 2. Transform query 3. Refactor query 4. remove non-indexed terms a. remove any tree conflicts b. collapse any branches 7. add normalized
- * values 8. adjust for case sensitivity 9. add prefix.. but jexl chokes on null byte
+ *
+ * 1. ParseQuery 2. Transform query 3. Refactor query 4. remove non-indexed terms a. remove any
+ * tree conflicts b. collapse any branches 7. add normalized values 8. adjust for case sensitivity
+ * 9. add prefix.. but jexl chokes on null byte
*/
public static void setLogLevel(Level lev) {
log.setLevel(lev);
}
-
+
/**
- *
- * @param query
- * @param options
+ *
* @return String representation of a given query.
- * @throws ParseException
- * @throws Exception
*/
- public String removeNonIndexedTermsAndInvalidRanges(String query, Map<String,String> options) throws ParseException, Exception {
+ public String removeNonIndexedTermsAndInvalidRanges(String query, Map<String,String> options)
+ throws ParseException, Exception {
Multimap<String,String> indexedTerms = parseIndexedTerms(options);
RewriterTreeNode node = parseJexlQuery(query);
if (log.isDebugEnabled()) {
@@ -104,22 +104,19 @@ public class FieldIndexQueryReWriter {
node = removeTreeConflicts(node, indexedTerms);
node = collapseBranches(node);
node = removeNegationViolations(node);
-
+
if (log.isDebugEnabled()) {
log.debug("Tree -NonIndexed: " + node.getContents());
}
return rebuildQueryFromTree(node);
}
-
+
/**
- *
- * @param query
- * @param options
+ *
* @return String representation of a given query.
- * @throws ParseException
- * @throws Exception
*/
- public String applyNormalizedTerms(String query, Map<String,String> options) throws ParseException, Exception {
+ public String applyNormalizedTerms(String query, Map<String,String> options)
+ throws ParseException, Exception {
if (log.isDebugEnabled()) {
log.debug("applyNormalizedTerms, query: " + query);
}
@@ -134,16 +131,13 @@ public class FieldIndexQueryReWriter {
}
return rebuildQueryFromTree(node);
}
-
+
/**
- *
- * @param query
- * @param fNameUpper
- * @param fValueUpper
+ *
* @return String representation of a given query.
- * @throws ParseException
*/
- public String applyCaseSensitivity(String query, boolean fNameUpper, boolean fValueUpper) throws ParseException {
+ public String applyCaseSensitivity(String query, boolean fNameUpper, boolean fValueUpper)
+ throws ParseException {
RewriterTreeNode node = parseJexlQuery(query);
if (log.isDebugEnabled()) {
log.debug("Tree: " + node.getContents());
@@ -154,7 +148,7 @@ public class FieldIndexQueryReWriter {
}
return rebuildQueryFromTree(node);
}
-
+
private String rebuildQueryFromTree(RewriterTreeNode node) {
if (node.isLeaf()) {
String fName = node.getFieldName();
@@ -177,7 +171,7 @@ public class FieldIndexQueryReWriter {
}
return fName + operator + "'" + fValue + "'";
} else {
- List<String> parts = new ArrayList<String>();
+ List<String> parts = new ArrayList<>();
Enumeration<?> children = node.children();
while (children.hasMoreElements()) {
RewriterTreeNode child = (RewriterTreeNode) children.nextElement();
@@ -195,7 +189,7 @@ public class FieldIndexQueryReWriter {
return query;
}
}
-
+
/*
* Don't use this, Jexl currently chokes on null bytes in the query
*/
@@ -227,7 +221,7 @@ public class FieldIndexQueryReWriter {
}
return root;
}
-
+
/*
*
*/
@@ -236,11 +230,11 @@ public class FieldIndexQueryReWriter {
if (log.isDebugEnabled()) {
log.debug("transformTreeNode, Equals Node");
}
-
+
Multimap<String,QueryTerm> terms = node.getTerms();
for (String fName : terms.keySet()) {
Collection<QueryTerm> values = terms.get(fName);
-
+
for (QueryTerm t : values) {
if (null == t || null == t.getValue()) {
continue;
@@ -248,17 +242,18 @@ public class FieldIndexQueryReWriter {
String fValue = t.getValue().toString();
fValue = fValue.replaceAll("'", "");
boolean negated = t.getOperator().equals("!=");
- RewriterTreeNode child = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, fName, fValue, negated);
+ RewriterTreeNode child =
+ new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, fName, fValue, negated);
return child;
}
}
}
-
+
if (node.getType().equals(ASTERNode.class) || node.getType().equals(ASTNRNode.class)) {
if (log.isDebugEnabled()) {
log.debug("transformTreeNode, Regex Node");
}
-
+
Multimap<String,QueryTerm> terms = node.getTerms();
for (String fName : terms.keySet()) {
Collection<QueryTerm> values = terms.get(fName);
@@ -269,14 +264,15 @@ public class FieldIndexQueryReWriter {
String fValue = t.getValue().toString();
fValue = fValue.replaceAll("'", "");
boolean negated = node.getType().equals(ASTNRNode.class);
- RewriterTreeNode child = new RewriterTreeNode(ParserTreeConstants.JJTERNODE, fName, fValue, negated);
+ RewriterTreeNode child =
+ new RewriterTreeNode(ParserTreeConstants.JJTERNODE, fName, fValue, negated);
return child;
}
}
}
-
- if (node.getType().equals(ASTLTNode.class) || node.getType().equals(ASTLENode.class) || node.getType().equals(ASTGTNode.class)
- || node.getType().equals(ASTGENode.class)) {
+
+ if (node.getType().equals(ASTLTNode.class) || node.getType().equals(ASTLENode.class)
+ || node.getType().equals(ASTGTNode.class) || node.getType().equals(ASTGENode.class)) {
if (log.isDebugEnabled()) {
log.debug("transformTreeNode, LT/LE/GT/GE node");
}
@@ -289,18 +285,20 @@ public class FieldIndexQueryReWriter {
}
String fValue = t.getValue().toString();
fValue = fValue.replaceAll("'", "").toLowerCase();
- boolean negated = false; // to be negated, must be child of Not, which is handled elsewhere.
+ boolean negated = false; // to be negated, must be child of Not, which is handled
+ // elsewhere.
int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator());
RewriterTreeNode child = new RewriterTreeNode(mytype, fName, fValue, negated);
return child;
}
}
}
-
+
RewriterTreeNode returnNode = null;
-
+
if (node.getType().equals(ASTAndNode.class) || node.getType().equals(ASTOrNode.class)) {
- int parentType = node.getType().equals(ASTAndNode.class) ? ParserTreeConstants.JJTANDNODE : ParserTreeConstants.JJTORNODE;
+ int parentType = node.getType().equals(ASTAndNode.class) ? ParserTreeConstants.JJTANDNODE
+ : ParserTreeConstants.JJTORNODE;
if (log.isDebugEnabled()) {
log.debug("transformTreeNode, AND/OR node: " + parentType);
}
@@ -350,8 +348,9 @@ public class FieldIndexQueryReWriter {
} else {
returnNode = new RewriterTreeNode(ParserTreeConstants.JJTNOTNODE);
}
- } else if (node.getType().equals(ASTJexlScript.class) || node.getType().getSimpleName().equals("RootNode")) {
-
+ } else if (node.getType().equals(ASTJexlScript.class)
+ || node.getType().getSimpleName().equals("RootNode")) {
+
if (log.isDebugEnabled()) {
log.debug("transformTreeNode, ROOT/JexlScript node");
}
@@ -378,43 +377,48 @@ public class FieldIndexQueryReWriter {
returnNode = new RewriterTreeNode(ParserTreeConstants.JJTJEXLSCRIPT);
}
} else {
- log.error("transformTreeNode, Currently Unsupported Node type: " + node.getClass().getName() + " \t" + node.getType());
+ log.error("transformTreeNode, Currently Unsupported Node type: " + node.getClass().getName()
+ + " \t" + node.getType());
}
for (TreeNode child : node.getChildren()) {
returnNode.add(transformTreeNode(child));
}
-
+
return returnNode;
}
-
- private RewriterTreeNode removeNonIndexedTerms(RewriterTreeNode root, Multimap<String,String> indexedTerms) throws Exception {
- // public void removeNonIndexedTerms(BooleanLogicTreeNodeJexl myroot, String indexedTerms) throws Exception {
+
+ private RewriterTreeNode removeNonIndexedTerms(RewriterTreeNode root,
+ Multimap<String,String> indexedTerms) throws Exception {
+ // public void removeNonIndexedTerms(BooleanLogicTreeNodeJexl myroot, String indexedTerms)
+ // throws Exception {
if (indexedTerms.isEmpty()) {
throw new Exception("removeNonIndexedTerms, indexed Terms empty");
}
-
+
// NOTE: doing a depth first enumeration didn't work when I started
// removing nodes halfway through. The following method does work,
// it's essentially a reverse breadth first traversal.
- List<RewriterTreeNode> nodes = new ArrayList<RewriterTreeNode>();
+ List<RewriterTreeNode> nodes = new ArrayList<>();
Enumeration<?> bfe = root.breadthFirstEnumeration();
-
+
while (bfe.hasMoreElements()) {
RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement();
nodes.add(node);
}
-
+
// walk backwards
for (int i = nodes.size() - 1; i >= 0; i--) {
RewriterTreeNode node = nodes.get(i);
if (log.isDebugEnabled()) {
- log.debug("removeNonIndexedTerms, analyzing node: " + node.toString() + " " + node.printNode());
+ log.debug(
+ "removeNonIndexedTerms, analyzing node: " + node.toString() + " " + node.printNode());
}
- if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) {
+ if (node.getType() == ParserTreeConstants.JJTANDNODE
+ || node.getType() == ParserTreeConstants.JJTORNODE) {
// If all of your children are gone, AND/OR has no purpose, remove
if (node.getChildCount() == 0) {
node.removeFromParent();
-
+
// If AND/OR has only 1 child, attach it to the parent directly.
} else if (node.getChildCount() == 1) {
RewriterTreeNode p = (RewriterTreeNode) node.getParent();
@@ -432,10 +436,12 @@ public class FieldIndexQueryReWriter {
continue;
} else {
if (log.isDebugEnabled()) {
- log.debug("removeNonIndexedTerms, Testing: " + node.getFieldName() + ":" + node.getFieldValue());
+ log.debug("removeNonIndexedTerms, Testing: " + node.getFieldName() + ":"
+ + node.getFieldValue());
}
-
- if (!indexedTerms.containsKey(node.getFieldName().toString() + ":" + node.getFieldValue().toString())) {
+
+ if (!indexedTerms
+ .containsKey(node.getFieldName().toString() + ":" + node.getFieldValue().toString())) {
if (log.isDebugEnabled()) {
log.debug(node.getFieldName() + ":" + node.getFieldValue() + " is NOT indexed");
}
@@ -447,11 +453,12 @@ public class FieldIndexQueryReWriter {
}
}
}
-
+
return root;
}
-
- private RewriterTreeNode orNormalizedTerms(RewriterTreeNode myroot, Multimap<String,String> indexedTerms) throws Exception {
+
+ private RewriterTreeNode orNormalizedTerms(RewriterTreeNode myroot,
+ Multimap<String,String> indexedTerms) throws Exception {
// we have multimap of FieldName to multiple FieldValues
if (indexedTerms.isEmpty()) {
throw new Exception("indexed Terms empty");
@@ -460,21 +467,23 @@ public class FieldIndexQueryReWriter {
// NOTE: doing a depth first enumeration didn't work when I started
// removing nodes halfway through. The following method does work,
// it's essentially a reverse breadth first traversal.
- List<RewriterTreeNode> nodes = new ArrayList<RewriterTreeNode>();
+ List<RewriterTreeNode> nodes = new ArrayList<>();
Enumeration<?> bfe = myroot.breadthFirstEnumeration();
-
+
while (bfe.hasMoreElements()) {
RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement();
nodes.add(node);
}
-
+
// walk backwards
for (int i = nodes.size() - 1; i >= 0; i--) {
RewriterTreeNode node = nodes.get(i);
if (log.isDebugEnabled()) {
- log.debug("orNormalizedTerms, analyzing node: " + node.toString() + " " + node.printNode());
+ log.debug(
+ "orNormalizedTerms, analyzing node: " + node.toString() + " " + node.printNode());
}
- if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) {
+ if (node.getType() == ParserTreeConstants.JJTANDNODE
+ || node.getType() == ParserTreeConstants.JJTORNODE) {
continue;
} else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) {
if (node.getChildCount() == 0) {
@@ -490,7 +499,7 @@ public class FieldIndexQueryReWriter {
String fName = node.getFieldName().toString();
String fValue = node.getFieldValue().toString();
if (indexedTerms.containsKey(fName + ":" + fValue)) {
-
+
if (indexedTerms.get(fName + ":" + fValue).size() > 1) {
// Replace node with an OR, and make children from the multimap collection
node.setType(ParserTreeConstants.JJTORNODE);
@@ -500,7 +509,8 @@ public class FieldIndexQueryReWriter {
node.setFieldValue(null);
Collection<String> values = indexedTerms.get(fName + ":" + fValue);
for (String value : values) {
- RewriterTreeNode n = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, fName, value, neg);
+ RewriterTreeNode n =
+ new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, fName, value, neg);
node.add(n);
}
} else if (indexedTerms.get(fName + ":" + fValue).size() == 1) {
@@ -511,9 +521,10 @@ public class FieldIndexQueryReWriter {
node.setFieldValue(val);
}
}
-
+
} else {
- // throw new Exception("orNormalizedTerms, encountered a non-indexed term: " + node.getFieldName().toString());
+ // throw new Exception("orNormalizedTerms, encountered a non-indexed term: " +
+ // node.getFieldName().toString());
}
}
}
@@ -521,63 +532,68 @@ public class FieldIndexQueryReWriter {
log.debug("Caught exception in orNormalizedTerms(): " + e);
throw new Exception("exception in: orNormalizedTerms");
}
-
+
return myroot;
}
-
+
/***
- * We only want to pass ranges on if they meet a very narrow set of conditions. All ranges must be bounded i.e. x between(1,5) so their parent is an AND. We
- * will only pass a range if 1. The AND is the direct child of HEAD node 2. The AND is a child of an OR which is a direct child of HEAD node.
- *
- * If there is an HEAD-AND[x,OR[b,AND[range]]], and you remove the range, this turns the tree into HEAD-AND[X,OR[B]] which becomes HEAD-AND[X,B] which will
- * miss entries, so you need to cut out the entire OR at this point and let the positive side of the AND pick it up.
+ * We only want to pass ranges on if they meet a very narrow set of conditions. All ranges must be
+ * bounded i.e. x between(1,5) so their parent is an AND. We will only pass a range if 1. The AND
+ * is the direct child of HEAD node 2. The AND is a child of an OR which is a direct child of HEAD
+ * node.
+ *
+ * If there is an HEAD-AND[x,OR[b,AND[range]]], and you remove the range, this turns the tree into
+ * HEAD-AND[X,OR[B]] which becomes HEAD-AND[X,B] which will miss entries, so you need to cut out
+ * the entire OR at this point and let the positive side of the AND pick it up.
*/
- private RewriterTreeNode removeTreeConflicts(RewriterTreeNode root, Multimap<String,String> indexedTerms) {
+ private RewriterTreeNode removeTreeConflicts(RewriterTreeNode root,
+ Multimap<String,String> indexedTerms) {
if (log.isDebugEnabled()) {
log.debug("removeTreeConflicts");
}
-
+
/*
- * You can't modify the enumeration, so save it into a list. We want to walk backwards in a breadthFirstEnumeration. So we don't throw null pointers when we
- * erase nodes and shorten our list.
+ * You can't modify the enumeration, so save it into a list. We want to walk backwards in a
+ * breadthFirstEnumeration. So we don't throw null pointers when we erase nodes and shorten our
+ * list.
*/
- List<RewriterTreeNode> nodeList = new ArrayList<RewriterTreeNode>();
+ List<RewriterTreeNode> nodeList = new ArrayList<>();
Enumeration<?> nodes = root.breadthFirstEnumeration();
while (nodes.hasMoreElements()) {
RewriterTreeNode child = (RewriterTreeNode) nodes.nextElement();
nodeList.add(child);
}
-
+
// walk backwards
for (int i = nodeList.size() - 1; i >= 0; i--) {
RewriterTreeNode node = nodeList.get(i);
-
+
if (node.isRemoval()) {
node.removeFromParent();
continue;
}
-
+
RewriterTreeNode parent = (RewriterTreeNode) node.getParent();
/*
- * All ranges must be bounded! This means the range must be part of an AND, and the parent of AND must be a HEAD node or an OR whose parent is a HEAD
- * node.
+ * All ranges must be bounded! This means the range must be part of an AND, and the parent of
+ * AND must be a HEAD node or an OR whose parent is a HEAD node.
*/
- if (node.getType() == ParserTreeConstants.JJTANDNODE
- && (node.getLevel() == 1 || (parent.getType() == ParserTreeConstants.JJTORNODE && parent.getLevel() == 1))) {
-
+ if (node.getType() == ParserTreeConstants.JJTANDNODE && (node.getLevel() == 1
+ || (parent.getType() == ParserTreeConstants.JJTORNODE && parent.getLevel() == 1))) {
+
if (log.isDebugEnabled()) {
log.debug("AND at level 1 or with OR parent at level 1");
}
Map<Text,RangeBounds> rangeMap = getBoundedRangeMap(node);
-
+
// can't modify the enumeration... save children to a list.
- List<RewriterTreeNode> childList = new ArrayList<RewriterTreeNode>();
+ List<RewriterTreeNode> childList = new ArrayList<>();
Enumeration<?> children = node.children();
while (children.hasMoreElements()) {
RewriterTreeNode child = (RewriterTreeNode) children.nextElement();
childList.add(child);
}
-
+
for (int j = childList.size() - 1; j >= 0; j--) {
RewriterTreeNode child = childList.get(j);
// currently we are not allowing unbounded ranges, so they must sit under an AND node.
@@ -608,49 +624,52 @@ public class FieldIndexQueryReWriter {
if (singleSib) {
child.removeFromParent();
} else {
- if (indexedTerms.containsKey(child.getFieldName() + ":" + child.getFieldValue())) {
+ if (indexedTerms
+ .containsKey(child.getFieldName() + ":" + child.getFieldValue())) {
if (log.isDebugEnabled()) {
log.debug("removeTreeConflicts, node: " + node.getContents());
}
// swap parent AND with an OR
node.removeAllChildren();
node.setType(ParserTreeConstants.JJTORNODE);
-
- Collection<String> values = indexedTerms.get(child.getFieldName() + ":" + child.getFieldValue());
+
+ Collection<String> values =
+ indexedTerms.get(child.getFieldName() + ":" + child.getFieldValue());
for (String value : values) {
- RewriterTreeNode n = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, child.getFieldName(), value, child.isNegated());
+ RewriterTreeNode n = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE,
+ child.getFieldName(), value, child.isNegated());
node.add(n);
}
if (log.isDebugEnabled()) {
log.debug("removeTreeConflicts, node: " + node.getContents());
}
-
+
break;
} else {
child.removeFromParent();
}
-
+
}
}
}
}
- }// end inner for
-
+ } // end inner for
+
} else { // remove all ranges!
if (node.isLeaf()) {
continue;
}
// can't modify the enumeration...
- List<RewriterTreeNode> childList = new ArrayList<RewriterTreeNode>();
+ List<RewriterTreeNode> childList = new ArrayList<>();
Enumeration<?> children = node.children();
while (children.hasMoreElements()) {
RewriterTreeNode child = (RewriterTreeNode) children.nextElement();
childList.add(child);
}
-
+
// walk backwards
for (int j = childList.size() - 1; j >= 0; j--) {
-
+
RewriterTreeNode child = childList.get(j);
if (log.isDebugEnabled()) {
log.debug("removeTreeConflicts, looking at node: " + node);
@@ -658,24 +677,25 @@ public class FieldIndexQueryReWriter {
if (rangeNodeSet.contains(child.getType())) {
// if grand parent is an OR and not top level, mark whole thing for removal.
RewriterTreeNode grandParent = (RewriterTreeNode) child.getParent().getParent();
- if (grandParent.getType() == ParserTreeConstants.JJTORNODE && grandParent.getLevel() != 1) {
+ if (grandParent.getType() == ParserTreeConstants.JJTORNODE
+ && grandParent.getLevel() != 1) {
grandParent.setRemoval(true);
}
child.removeFromParent();
}
}
}
-
- }// end outer for
-
+
+ } // end outer for
+
return root;
}
-
+
private RewriterTreeNode removeNegationViolations(RewriterTreeNode node) throws Exception {
// Double check the top level node for negation violations
// if AND, one child must be positive, if OR, no negatives allowed.
RewriterTreeNode one = (RewriterTreeNode) node.getFirstChild(); // Head node has only 1 child.
- ArrayList<RewriterTreeNode> childrenList = new ArrayList<RewriterTreeNode>();
+ ArrayList<RewriterTreeNode> childrenList = new ArrayList<>();
Enumeration<?> children = one.children();
while (children.hasMoreElements()) {
RewriterTreeNode child = (RewriterTreeNode) children.nextElement();
@@ -702,33 +722,35 @@ public class FieldIndexQueryReWriter {
throw new Exception("FieldIndexQueryReWriter: Top level query node cannot be processed.");
}
}
-
+
return node;
}
-
+
// After tree conflicts have been resolve, we can collapse branches where
// leaves have been pruned.
private RewriterTreeNode collapseBranches(RewriterTreeNode myroot) throws Exception {
-
+
// NOTE: doing a depth first enumeration didn't wory when I started
// removing nodes halfway through. The following method does work,
// it's essentially a reverse breadth first traversal.
- List<RewriterTreeNode> nodes = new ArrayList<RewriterTreeNode>();
+ List<RewriterTreeNode> nodes = new ArrayList<>();
Enumeration<?> bfe = myroot.breadthFirstEnumeration();
-
+
while (bfe.hasMoreElements()) {
RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement();
nodes.add(node);
}
-
+
// walk backwards
for (int i = nodes.size() - 1; i >= 0; i--) {
RewriterTreeNode node = nodes.get(i);
if (log.isDebugEnabled()) {
- log.debug("collapseBranches, inspecting node: " + node.toString() + " " + node.printNode());
+ log.debug(
+ "collapseBranches, inspecting node: " + node.toString() + " " + node.printNode());
}
-
- if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) {
+
+ if (node.getType() == ParserTreeConstants.JJTANDNODE
+ || node.getType() == ParserTreeConstants.JJTORNODE) {
if (node.getChildCount() == 0) {
node.removeFromParent();
} else if (node.getChildCount() == 1) {
@@ -736,7 +758,7 @@ public class FieldIndexQueryReWriter {
RewriterTreeNode c = (RewriterTreeNode) node.getFirstChild();
node.removeFromParent();
p.add(c);
-
+
}
} else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) {
if (node.getChildCount() == 0) {
@@ -746,10 +768,7 @@ public class FieldIndexQueryReWriter {
}
return myroot;
}
-
- /**
- * @param options
- */
+
public Multimap<String,String> parseIndexedTerms(Map<String,String> options) {
if (options.get(INDEXED_TERMS_LIST) != null) {
Multimap<String,String> mmap = HashMultimap.create();
@@ -764,9 +783,9 @@ public class FieldIndexQueryReWriter {
for (int i = 2; i < parts.length; i++) {
// key is original query token, i.e. color:red
mmap.put(parts[0] + ":" + parts[1], parts[i]);
-
+
}
-
+
}
if (log.isDebugEnabled()) {
log.debug("multimap: " + mmap);
@@ -778,16 +797,13 @@ public class FieldIndexQueryReWriter {
}
return null;
}
-
- /**
- * @param root
- */
+
public RewriterTreeNode refactorTree(RewriterTreeNode root) {
Enumeration<?> dfe = root.breadthFirstEnumeration();
-
+
while (dfe.hasMoreElements()) {
RewriterTreeNode n = (RewriterTreeNode) dfe.nextElement();
-
+
if (n.getType() == ParserTreeConstants.JJTNOTNODE) {// BooleanLogicTreeNode.NodeType.NOT) {
RewriterTreeNode child = (RewriterTreeNode) n.getChildAt(0);
child.setNegated(true);
@@ -796,46 +812,48 @@ public class FieldIndexQueryReWriter {
parent.add(child);
}
}
-
+
// cycle through again and distribute nots
Enumeration<?> bfe = root.breadthFirstEnumeration();
RewriterTreeNode child;
-
+
while (bfe.hasMoreElements()) {
child = (RewriterTreeNode) bfe.nextElement();
-
+
if (child.isNegated()) {
if (child.getChildCount() > 0) {
demorganSubTree(child);
-
+
}
}
}
return root;
-
+
}
-
+
private void demorganSubTree(RewriterTreeNode root) {
-
+
root.setNegated(false);
// root.setChildrenAllNegated(true);
-
+
if (root.getType() == ParserTreeConstants.JJTANDNODE) {// BooleanLogicTreeNode.NodeType.AND) {
// root.setType(BooleanLogicTreeNode.NodeType.OR);
root.setType(ParserTreeConstants.JJTORNODE);
- } else if (root.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) {
+ } else if (root.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR)
+ // {
// root.setType(BooleanLogicTreeNode.NodeType.AND);
root.setType(ParserTreeConstants.JJTANDNODE);
- } else if (root.getType() == ParserTreeConstants.JJTEQNODE || root.getType() == ParserTreeConstants.JJTERNODE) {
+ } else if (root.getType() == ParserTreeConstants.JJTEQNODE
+ || root.getType() == ParserTreeConstants.JJTERNODE) {
// do nothing
} else {
log.error("refactorSubTree, node type not supported");
}
-
+
Enumeration<?> children = root.children();
RewriterTreeNode child = null;
// now distribute the negative
-
+
while (children.hasMoreElements()) {
child = (RewriterTreeNode) children.nextElement();
if (child.isNegated()) {
@@ -845,32 +863,37 @@ public class FieldIndexQueryReWriter {
}
}
}
-
- private RewriterTreeNode applyCaseSensitivity(RewriterTreeNode root, boolean fnUpper, boolean fvUpper) {
+
+ private RewriterTreeNode applyCaseSensitivity(RewriterTreeNode root, boolean fnUpper,
+ boolean fvUpper) {
// for each leaf, apply case sensitivity
Enumeration<?> bfe = root.breadthFirstEnumeration();
while (bfe.hasMoreElements()) {
RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement();
if (node.isLeaf()) {
- String fName = fnUpper ? node.getFieldName().toUpperCase() : node.getFieldName().toLowerCase();
+ String fName =
+ fnUpper ? node.getFieldName().toUpperCase() : node.getFieldName().toLowerCase();
node.setFieldName(fName);
-
- String fValue = fvUpper ? node.getFieldValue().toUpperCase() : node.getFieldValue().toLowerCase();
+
+ String fValue =
+ fvUpper ? node.getFieldValue().toUpperCase() : node.getFieldValue().toLowerCase();
node.setFieldValue(fValue);
-
+
}
}
return root;
}
-
+
private Map<Text,RangeBounds> getBoundedRangeMap(RewriterTreeNode node) {
-
- if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) {
+
+ if (node.getType() == ParserTreeConstants.JJTANDNODE
+ || node.getType() == ParserTreeConstants.JJTORNODE) {
Enumeration<?> children = node.children();
- Map<Text,RangeBounds> rangeMap = new HashMap<Text,RangeBounds>();
+ Map<Text,RangeBounds> rangeMap = new HashMap<>();
while (children.hasMoreElements()) {
RewriterTreeNode child = (RewriterTreeNode) children.nextElement();
- if (child.getType() == ParserTreeConstants.JJTLENODE || child.getType() == ParserTreeConstants.JJTLTNODE) {
+ if (child.getType() == ParserTreeConstants.JJTLENODE
+ || child.getType() == ParserTreeConstants.JJTLTNODE) {
Text fName = new Text(child.getFieldName());
if (rangeMap.containsKey(fName)) {
RangeBounds rb = rangeMap.get(fName);
@@ -883,8 +906,9 @@ public class FieldIndexQueryReWriter {
rb.setLower(new Text(child.getFieldValue()));
rangeMap.put(new Text(child.getFieldName()), rb);
}
-
- } else if (child.getType() == ParserTreeConstants.JJTGENODE || child.getType() == ParserTreeConstants.JJTGTNODE) {
+
+ } else if (child.getType() == ParserTreeConstants.JJTGENODE
+ || child.getType() == ParserTreeConstants.JJTGTNODE) {
Text fName = new Text(child.getFieldName());
if (rangeMap.containsKey(fName)) {
RangeBounds rb = rangeMap.get(fName);
@@ -899,13 +923,14 @@ public class FieldIndexQueryReWriter {
}
}
}
-
+
for (Entry<Text,RangeBounds> entry : rangeMap.entrySet()) {
RangeBounds rb = entry.getValue();
if (rb.getLower() == null || rb.getUpper() == null) {
// unbounded range, remove
if (log.isDebugEnabled()) {
- log.debug("testBoundedRangeExistence: Unbounded Range detected, removing entry from rangeMap");
+ log.debug(
+ "testBoundedRangeExistence: Unbounded Range detected, removing entry from rangeMap");
}
rangeMap.remove(entry.getKey());
}
@@ -914,15 +939,15 @@ public class FieldIndexQueryReWriter {
return rangeMap;
}
}
-
+
return null;
}
-
+
/**
* INNER CLASSES
*/
public class RewriterTreeNode extends DefaultMutableTreeNode {
-
+
private static final long serialVersionUID = 1L;
private boolean negated = false;
private String fieldName;
@@ -930,43 +955,26 @@ public class FieldIndexQueryReWriter {
private String operator;
private int type;
private boolean removal = false;
-
- /**
- *
- * @param type
- */
+
public RewriterTreeNode(int type) {
super();
this.type = type;
}
-
- /**
- *
- * @param type
- * @param fName
- * @param fValue
- */
+
public RewriterTreeNode(int type, String fName, String fValue) {
super();
init(type, fName, fValue);
}
-
- /**
- *
- * @param type
- * @param fName
- * @param fValue
- * @param negate
- */
+
public RewriterTreeNode(int type, String fName, String fValue, boolean negate) {
super();
init(type, fName, fValue, negate);
}
-
+
private void init(int type, String fName, String fValue) {
init(type, fName, fValue, false);
}
-
+
private void init(int type, String fName, String fValue, boolean negate) {
this.type = type;
this.fieldName = fName;
@@ -977,98 +985,78 @@ public class FieldIndexQueryReWriter {
log.debug("FN: " + this.fieldName + " FV: " + this.fieldValue + " Op: " + this.operator);
}
}
-
+
/**
* @return The field name.
*/
public String getFieldName() {
return fieldName;
}
-
- /**
- *
- * @param fieldName
- */
+
public void setFieldName(String fieldName) {
this.fieldName = fieldName;
}
-
+
/**
- *
+ *
* @return The field value.
*/
public String getFieldValue() {
return fieldValue;
}
-
- /**
- *
- * @param fieldValue
- */
+
public void setFieldValue(String fieldValue) {
this.fieldValue = fieldValue;
}
-
+
/**
- *
+ *
* @return true if negated, otherwise false.
*/
public boolean isNegated() {
return negated;
}
-
- /**
- *
- * @param negated
- */
+
public void setNegated(boolean negated) {
this.negated = negated;
}
-
+
/**
- *
+ *
* @return The operator.
*/
public String getOperator() {
return operator;
}
-
- /**
- *
- * @param operator
- */
+
public void setOperator(String operator) {
this.operator = operator;
}
-
+
/**
- *
+ *
* @return The type.
*/
public int getType() {
return type;
}
-
- /**
- *
- * @param type
- */
+
public void setType(int type) {
this.type = type;
}
-
+
public boolean isRemoval() {
return removal;
}
-
+
public void setRemoval(boolean removal) {
this.removal = removal;
}
-
+
public String getContents() {
StringBuilder s = new StringBuilder("[");
s.append(toString());
-
+
if (children != null) {
Enumeration<?> e = this.children();
while (e.hasMoreElements()) {
@@ -1080,9 +1068,9 @@ public class FieldIndexQueryReWriter {
s.append("]");
return s.toString();
}
-
+
/**
- *
+ *
* @return A string represenation of the field name and value.
*/
public String printNode() {
@@ -1102,7 +1090,7 @@ public class FieldIndexQueryReWriter {
s.append("]");
return s.toString();
}
-
+
@Override
public String toString() {
switch (type) {
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java
index aaac3d8..47d6f20 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java
@@ -23,7 +23,6 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
-
import org.apache.accumulo.examples.wikisearch.function.QueryFunctions;
import org.apache.accumulo.examples.wikisearch.jexl.Arithmetic;
import org.apache.accumulo.examples.wikisearch.parser.EventFields.FieldValue;
@@ -38,24 +37,23 @@ import org.apache.commons.jexl2.parser.ParserTreeConstants;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
-
import com.google.common.collect.Multimap;
-
/**
- * This class evaluates events against a query. The query is passed to the constructor and then parsed. It is evaluated against an event in the evaluate method.
+ * This class evaluates events against a query. The query is passed to the constructor and then
+ * parsed. It is evaluated against an event in the evaluate method.
*/
public class QueryEvaluator {
-
+
private static Logger log = Logger.getLogger(QueryEvaluator.class);
// According to the JEXL 2.0 docs, the engine is thread-safe. Let's create 1 engine per VM and
// cache 128 expressions
private static JexlEngine engine = new JexlEngine(null, new Arithmetic(false), null, null);
-
+
static {
engine.setSilent(false);
engine.setCache(128);
- Map<String,Object> functions = new HashMap<String,Object>();
+ Map<String,Object> functions = new HashMap<>();
functions.put("f", QueryFunctions.class);
engine.setFunctions(functions);
}
@@ -65,7 +63,7 @@ public class QueryEvaluator {
private String modifiedQuery = null;
private JexlContext ctx = new MapContext();
private boolean caseInsensitive = true;
-
+
public QueryEvaluator(String query) throws ParseException {
this.caseInsensitive = true; // default case insensitive matching.
if (caseInsensitive) {
@@ -76,7 +74,7 @@ public class QueryEvaluator {
parser.execute(query);
this.terms = parser.getQueryTerms();
if (caseInsensitive) {
- literals = new HashSet<String>();
+ literals = new HashSet<>();
for (String lit : parser.getQueryIdentifiers()) {
literals.add(lit.toLowerCase());
}
@@ -84,7 +82,7 @@ public class QueryEvaluator {
this.literals = parser.getQueryIdentifiers();
}
}
-
+
public QueryEvaluator(String query, boolean insensitive) throws ParseException {
this.caseInsensitive = insensitive;
if (this.caseInsensitive) {
@@ -94,9 +92,9 @@ public class QueryEvaluator {
QueryParser parser = new QueryParser();
parser.execute(query);
this.terms = parser.getQueryTerms();
-
+
if (caseInsensitive) {
- literals = new HashSet<String>();
+ literals = new HashSet<>();
for (String lit : parser.getQueryIdentifiers()) {
literals.add(lit.toLowerCase());
}
@@ -104,22 +102,23 @@ public class QueryEvaluator {
this.literals = parser.getQueryIdentifiers();
}
}
-
+
public String getQuery() {
return this.query;
}
-
+
public void printLiterals() {
for (String s : literals) {
System.out.println("literal: " + s);
}
}
-
+
public void setLevel(Level lev) {
log.setLevel(lev);
}
-
- public StringBuilder rewriteQuery(StringBuilder query, String fieldName, Collection<FieldValue> fieldValues) {
+
+ public StringBuilder rewriteQuery(StringBuilder query, String fieldName,
+ Collection<FieldValue> fieldValues) {
if (log.isDebugEnabled()) {
log.debug("rewriteQuery");
}
@@ -145,37 +144,42 @@ public class QueryEvaluator {
}
// Add the array to the context
ctx.set(fieldName, values);
-
+
Collection<QueryTerm> qt = terms.get(fieldName);
-
+
// Add a script to the beginning of the query for this multi-valued field
StringBuilder script = new StringBuilder();
script.append("_").append(fieldName).append(" = false;\n");
script.append("for (field : ").append(fieldName).append(") {\n");
-
+
for (QueryTerm t : qt) {
- if (!t.getOperator().equals(JexlOperatorConstants.getOperator(ParserTreeConstants.JJTFUNCTIONNODE))) {
- script.append("\tif (_").append(fieldName).append(" == false && field ").append(t.getOperator()).append(" ").append(t.getValue()).append(") { \n");
+ if (!t.getOperator()
+ .equals(JexlOperatorConstants.getOperator(ParserTreeConstants.JJTFUNCTIONNODE))) {
+ script.append("\tif (_").append(fieldName).append(" == false && field ")
+ .append(t.getOperator()).append(" ").append(t.getValue()).append(") { \n");
} else {
- script.append("\tif (_").append(fieldName).append(" == false && ").append(t.getValue().toString().replace(fieldName, "field")).append(") { \n");
+ script.append("\tif (_").append(fieldName).append(" == false && ")
+ .append(t.getValue().toString().replace(fieldName, "field")).append(") { \n");
}
script.append("\t\t_").append(fieldName).append(" = true;\n");
script.append("\t}\n");
}
script.append("}\n");
-
+
// Add the script to the beginning of the query
query.insert(0, script.toString());
-
+
StringBuilder newPredicate = new StringBuilder();
newPredicate.append("_").append(fieldName).append(" == true");
-
+
for (QueryTerm t : qt) {
// Find the location of this term in the query
StringBuilder predicate = new StringBuilder();
int start = 0;
- if (!t.getOperator().equals(JexlOperatorConstants.getOperator(ParserTreeConstants.JJTFUNCTIONNODE))) {
- predicate.append(fieldName).append(" ").append(t.getOperator()).append(" ").append(t.getValue());
+ if (!t.getOperator()
+ .equals(JexlOperatorConstants.getOperator(ParserTreeConstants.JJTFUNCTIONNODE))) {
+ predicate.append(fieldName).append(" ").append(t.getOperator()).append(" ")
+ .append(t.getValue());
start = query.indexOf(predicate.toString());
} else {
predicate.append(t.getValue().toString());
@@ -183,37 +187,36 @@ public class QueryEvaluator {
start = query.indexOf(predicate.toString());
}
if (-1 == start) {
- log.warn("Unable to find predicate: " + predicate.toString() + " in rewritten query: " + query.toString());
+ log.warn("Unable to find predicate: " + predicate.toString() + " in rewritten query: "
+ + query.toString());
}
int length = predicate.length();
-
+
// Now modify the query to check the value of my.fieldName
query.replace(start, start + length, newPredicate.toString());
}
-
+
if (log.isDebugEnabled()) {
log.debug("leaving rewriteQuery with: " + query.toString());
}
return query;
}
-
+
/**
* Evaluates the query against an event.
- *
- * @param eventFields
*/
public boolean evaluate(EventFields eventFields) {
-
+
this.modifiedQuery = null;
boolean rewritten = false;
-
+
// Copy the query
StringBuilder q = new StringBuilder(query);
// Copy the literals, we are going to remove elements from this set
// when they are added to the JEXL context. This will allow us to
// determine which items in the query where *NOT* in the data.
- HashSet<String> literalsCopy = new HashSet<String>(literals);
-
+ HashSet<String> literalsCopy = new HashSet<>(literals);
+
// Loop through the event fields and add them to the JexlContext.
for (Entry<String,Collection<FieldValue>> field : eventFields.asMap().entrySet()) {
String fName = field.getKey();
@@ -226,52 +229,55 @@ public class QueryEvaluator {
} else {
literalsCopy.remove(fName);
}
-
+
// This field may have multiple values.
if (field.getValue().size() == 0) {
continue;
} else if (field.getValue().size() == 1) {
// We are explicitly converting these bytes to a String.
if (caseInsensitive) {
- ctx.set(field.getKey().toLowerCase(), (new String(field.getValue().iterator().next().getValue())).toLowerCase());
+ ctx.set(field.getKey().toLowerCase(),
+ (new String(field.getValue().iterator().next().getValue())).toLowerCase());
} else {
ctx.set(field.getKey(), new String(field.getValue().iterator().next().getValue()));
}
-
+
} else {
// q = queryRewrite(q, field.getKey(), field.getValue());
q = rewriteQuery(q, field.getKey(), field.getValue());
rewritten = true;
- }// End of if
-
- }// End of loop
-
+ } // End of if
+
+ } // End of loop
+
// For any literals in the query that were not found in the data, add them to the context
// with a null value.
for (String lit : literalsCopy) {
ctx.set(lit, null);
}
-
+
if (log.isDebugEnabled()) {
log.debug("Evaluating query: " + q.toString());
}
-
+
this.modifiedQuery = q.toString();
-
+
Boolean result = null;
if (rewritten) {
Script script = engine.createScript(this.modifiedQuery);
try {
result = (Boolean) script.execute(ctx);
} catch (Exception e) {
- log.error("Error evaluating script: " + this.modifiedQuery + " against event" + eventFields.toString(), e);
+ log.error("Error evaluating script: " + this.modifiedQuery + " against event"
+ + eventFields.toString(), e);
}
} else {
Expression expr = engine.createExpression(this.modifiedQuery);
try {
result = (Boolean) expr.evaluate(ctx);
} catch (Exception e) {
- log.error("Error evaluating expression: " + this.modifiedQuery + " against event" + eventFields.toString(), e);
+ log.error("Error evaluating expression: " + this.modifiedQuery + " against event"
+ + eventFields.toString(), e);
}
}
if (null != result && result) {
@@ -280,9 +286,9 @@ public class QueryEvaluator {
return false;
}
} // End of method
-
+
/**
- *
+ *
* @return rewritten query that was evaluated against the most recent event
*/
public String getModifiedQuery() {
diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
index 8a5474b..105351d 100644
--- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
+++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java
@@ -16,7 +16,6 @@
*/
package org.apache.accumulo.examples.wikisearch.parser;
-
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@@ -62,72 +61,77 @@ import com.google.common.collect.Multimap;
import com.google.protobuf.InvalidProtocolBufferException;
/**
- * This class is used to query the global indices to determine that set of ranges to use when querying the shard table. The RangeCalculator looks at each term
- * in the query to determine if it is a equivalence, range, or wildcard comparison, and queries the appropriate index to find the ranges for the terms which are
- * then cached. The final set of ranges is computed as the AST is traversed.
+ * This class is used to query the global indices to determine that set of ranges to use when
+ * querying the shard table. The RangeCalculator looks at each term in the query to determine if it
+ * is a equivalence, range, or wildcard comparison, and queries the appropriate index to find the
+ * ranges for the terms which are then cached. The final set of ranges is computed as the AST is
+ * traversed.
*/
public class RangeCalculator extends QueryParser {
-
+
/**
* Container used as map keys in this class
- *
+ *
*/
public static class MapKey implements Comparable<MapKey> {
private String fieldName = null;
private String fieldValue = null;
private String originalQueryValue = null;
-
+
public MapKey(String fieldName, String fieldValue) {
super();
this.fieldName = fieldName;
this.fieldValue = fieldValue;
}
-
+
public String getFieldName() {
return fieldName;
}
-
+
public String getFieldValue() {
return fieldValue;
}
-
+
public void setFieldName(String fieldName) {
this.fieldName = fieldName;
}
-
+
public void setFieldValue(String fieldValue) {
this.fieldValue = fieldValue;
}
-
+
public String getOriginalQueryValue() {
return originalQueryValue;
}
-
+
public void setOriginalQueryValue(String originalQueryValue) {
this.originalQueryValue = originalQueryValue;
}
-
+
@Override
public int hashCode() {
return new HashCodeBuilder(17, 37).append(fieldName).append(fieldValue).toHashCode();
}
-
+
@Override
public String toString() {
return this.fieldName + " " + this.fieldValue;
}
-
+
@Override
public boolean equals(Object other) {
- if (other == null)
+ if (other == null) {
return false;
+ }
if (other instanceof MapKey) {
MapKey o = (MapKey) other;
return (this.fieldName.equals(o.fieldName) && this.fieldValue.equals(o.fieldValue));
- } else
+ } else {
return false;
+ }
}
-
+
+ @Override
public int compareTo(MapKey o) {
int result = this.fieldName.compareTo(o.fieldName);
if (result != 0) {
@@ -136,87 +140,88 @@ public class RangeCalculator extends QueryParser {
return result;
}
}
-
+
}
-
+
/**
* Container used to hold the lower and upper bound of a range
- *
+ *
*/
public static class RangeBounds {
private String originalLower = null;
private Text lower = null;
private String originalUpper = null;
private Text upper = null;
-
+
public Text getLower() {
return lower;
}
-
+
public Text getUpper() {
return upper;
}
-
+
public void setLower(Text lower) {
this.lower = lower;
}
-
+
public void setUpper(Text upper) {
this.upper = upper;
}
-
+
public String getOriginalLower() {
return originalLower;
}
-
+
public String getOriginalUpper() {
return originalUpper;
}
-
+
public void setOriginalLower(String originalLower) {
this.originalLower = originalLower;
}
-
+
public void setOriginalUpper(String originalUpper) {
this.originalUpper = originalUpper;
}
}
-
+
/**
- *
- * Object that is used to hold ranges found in the index. Subclasses may compute the final range set in various ways.
+ *
+ * Object that is used to hold ranges found in the index. Subclasses may compute the final range
+ * set in various ways.
*/
protected static class TermRange implements Comparable<TermRange> {
-
+
private String fieldName = null;
private Object fieldValue = null;
- private Set<Range> ranges = new TreeSet<Range>();
-
+ private Set<Range> ranges = new TreeSet<>();
+
public TermRange(String name, Object fieldValue) {
this.fieldName = name;
this.fieldValue = fieldValue;
}
-
+
public String getFieldName() {
return this.fieldName;
}
-
+
public Object getFieldValue() {
return this.fieldValue;
}
-
+
public void addAll(Set<Range> r) {
ranges.addAll(r);
}
-
+
public void add(Range r) {
ranges.add(r);
}
-
+
public Set<Range> getRanges() {
return ranges;
}
-
+
@Override
public String toString() {
ToStringBuilder tsb = new ToStringBuilder(this);
@@ -225,7 +230,8 @@ public class RangeCalculator extends QueryParser {
tsb.append("ranges", ranges);
return tsb.toString();
}
-
+
+ @Override
public int compareTo(TermRange o) {
int result = this.fieldName.compareTo(o.fieldName);
if (result == 0) {
@@ -235,7 +241,7 @@ public class RangeCalculator extends QueryParser {
}
}
}
-
+
/**
* Object used to store context information as the AST is being traversed.
*/
@@ -246,11 +252,11 @@ public class RangeCalculator extends QueryParser {
TermRange lastRange = null;
String lastProcessedTerm = null;
}
-
+
protected static Logger log = Logger.getLogger(RangeCalculator.class);
private static String WILDCARD = ".*";
private static String SINGLE_WILDCARD = "\\.";
-
+
protected Connector c;
protected Authorizations auths;
protected Multimap<String,Normalizer> indexedTerms;
@@ -258,33 +264,23 @@ public class RangeCalculator extends QueryParser {
protected String indexTableName;
protected String reverseIndexTableName;
protected int queryThreads = 8;
-
+
/* final results of index lookups, ranges for the shard table */
protected Set<Range> result = null;
/* map of field names to values found in the index */
protected Multimap<String,String> indexEntries = HashMultimap.create();
/* map of value in the index to the original query values */
- protected Map<String,String> indexValues = new HashMap<String,String>();
+ protected Map<String,String> indexValues = new HashMap<>();
/* map of values in the query to map keys used */
protected Multimap<String,MapKey> originalQueryValues = HashMultimap.create();
/* map of field name to cardinality */
- protected Map<String,Long> termCardinalities = new HashMap<String,Long>();
+ protected Map<String,Long> termCardinalities = new HashMap<>();
/* cached results of all ranges found global index lookups */
- protected Map<MapKey,TermRange> globalIndexResults = new HashMap<MapKey,TermRange>();
-
- /**
- *
- * @param c
- * @param auths
- * @param indexedTerms
- * @param terms
- * @param query
- * @param logic
- * @param typeFilter
- * @throws ParseException
- */
- public void execute(Connector c, Authorizations auths, Multimap<String,Normalizer> indexedTerms, Multimap<String,QueryTerm> terms, String query,
- AbstractQueryLogic logic, Set<String> typeFilter) throws ParseException {
+ protected Map<MapKey,TermRange> globalIndexResults = new HashMap<>();
+
+ public void execute(Connector c, Authorizations auths, Multimap<String,Normalizer> indexedTerms,
+ Multimap<String,QueryTerm> terms, String query, AbstractQueryLogic logic,
+ Set<String> typeFilter) throws ParseException {
super.execute(query);
this.c = c;
this.auths = auths;
@@ -293,21 +289,26 @@ public class RangeCalculator extends QueryParser {
this.indexTableName = logic.getIndexTableName();
this.reverseIndexTableName = logic.getReverseIndexTableName();
this.queryThreads = logic.getQueryThreads();
-
- Map<MapKey,Set<Range>> indexRanges = new HashMap<MapKey,Set<Range>>();
- Map<MapKey,Set<Range>> trailingWildcardRanges = new HashMap<MapKey,Set<Range>>();
- Map<MapKey,Set<Range>> leadingWildcardRanges = new HashMap<MapKey,Set<Range>>();
- Map<Text,RangeBounds> rangeMap = new HashMap<Text,RangeBounds>();
-
+
+ Map<MapKey,Set<Range>> indexRanges = new HashMap<>();
+ Map<MapKey,Set<Range>> trailingWildcardRanges = new HashMap<>();
+ Map<MapKey,Set<Range>> leadingWildcardRanges = new HashMap<>();
+ Map<Text,RangeBounds> rangeMap = new HashMap<>();
+
// Here we iterate over all of the terms in the query to determine if they are an equivalence,
// wildcard, or range type operator
for (Entry<String,QueryTerm> entry : terms.entries()) {
if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTEQNode.class))
- || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTERNode.class))
- || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTLTNode.class))
- || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTLENode.class))
- || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTGTNode.class))
- || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTGENode.class))) {
+ || entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTERNode.class))
+ || entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTLTNode.class))
+ || entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTLENode.class))
+ || entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTGTNode.class))
+ || entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTGENode.class))) {
// If this term is not in the set of indexed terms, then bail
if (!indexedTerms.containsKey(entry.getKey())) {
termCardinalities.put(entry.getKey().toUpperCase(), 0L);
@@ -319,67 +320,81 @@ public class RangeCalculator extends QueryParser {
continue;
}
// In the case where we are looking for 'null', then skip.
- if (null == entry.getValue().getValue() || ((String) entry.getValue().getValue()).equals("null")) {
+ if (null == entry.getValue().getValue()
+ || ((String) entry.getValue().getValue()).equals("null")) {
termCardinalities.put(entry.getKey().toUpperCase(), 0L);
continue;
}
-
+
// Remove the begin and end ' marks
String value = null;
- if (((String) entry.getValue().getValue()).startsWith("'") && ((String) entry.getValue().getValue()).endsWith("'"))
- value = ((String) entry.getValue().getValue()).substring(1, ((String) entry.getValue().getValue()).length() - 1);
- else
+ if (((String) entry.getValue().getValue()).startsWith("'")
+ && ((String) entry.getValue().getValue()).endsWith("'")) {
+ value = ((String) entry.getValue().getValue()).substring(1,
+ ((String) entry.getValue().getValue()).length() - 1);
+ } else {
value = (String) entry.getValue().getValue();
+ }
// The entries in the index are normalized
for (Normalizer normalizer : indexedTerms.get(entry.getKey())) {
String normalizedFieldValue = normalizer.normalizeFieldValue(null, value);
Text fieldValue = new Text(normalizedFieldValue);
Text fieldName = new Text(entry.getKey().toUpperCase());
-
+
// EQUALS
- if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTEQNode.class))) {
+ if (entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTEQNode.class))) {
Key startRange = new Key(fieldValue, fieldName);
Range r = new Range(startRange, true, startRange.followingKey(PartialKey.ROW), true);
-
+
MapKey key = new MapKey(fieldName.toString(), fieldValue.toString());
key.setOriginalQueryValue(value);
this.originalQueryValues.put(value, key);
- if (!indexRanges.containsKey(key))
+ if (!indexRanges.containsKey(key)) {
indexRanges.put(key, new HashSet<Range>());
+ }
indexRanges.get(key).add(r);
// WILDCARD
- } else if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTERNode.class))) {
- // This is a wildcard query using regex. We can only support leading and trailing wildcards at this time. Leading
- // wildcards will need be reversed and sent to the global reverse index. Trailing wildcard queries will be sent to the
- // global index. In all cases, the range for the wilcard will be the range of possible UNICODE codepoints, hex 0 to 10FFFF.
+ } else if (entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTERNode.class))) {
+ // This is a wildcard query using regex. We can only support leading and trailing
+ // wildcards at this time. Leading
+ // wildcards will need be reversed and sent to the global reverse index. Trailing
+ // wildcard queries will be sent to the
+ // global index. In all cases, the range for the wilcard will be the range of possible
+ // UNICODE codepoints, hex 0 to 10FFFF.
int loc = normalizedFieldValue.indexOf(WILDCARD);
- if (-1 == loc)
+ if (-1 == loc) {
loc = normalizedFieldValue.indexOf(SINGLE_WILDCARD);
+ }
if (-1 == loc) {
// Then no wildcard in the query? Treat like the equals case above.
Key startRange = new Key(fieldValue, fieldName);
Range r = new Range(startRange, true, startRange.followingKey(PartialKey.ROW), true);
-
+
MapKey key = new MapKey(fieldName.toString(), fieldValue.toString());
key.setOriginalQueryValue(value);
this.originalQueryValues.put(value, key);
- if (!indexRanges.containsKey(key))
+ if (!indexRanges.containsKey(key)) {
indexRanges.put(key, new HashSet<Range>());
+ }
indexRanges.get(key).add(r);
} else {
if (loc == 0) {
- // Then we have a leading wildcard, reverse the term and use the global reverse index.
+ // Then we have a leading wildcard, reverse the term and use the global reverse
+ // index.
StringBuilder buf = new StringBuilder(normalizedFieldValue.substring(2));
normalizedFieldValue = buf.reverse().toString();
Key startRange = new Key(new Text(normalizedFieldValue + "\u0000"), fieldName);
Key endRange = new Key(new Text(normalizedFieldValue + "\u10FFFF"), fieldName);
Range r = new Range(startRange, true, endRange, true);
-
+
MapKey key = new MapKey(fieldName.toString(), normalizedFieldValue);
key.setOriginalQueryValue(value);
this.originalQueryValues.put(value, key);
- if (!leadingWildcardRanges.containsKey(key))
+ if (!leadingWildcardRanges.containsKey(key)) {
leadingWildcardRanges.put(key, new HashSet<Range>());
+ }
leadingWildcardRanges.get(key).add(r);
} else if (loc == (normalizedFieldValue.length() - 2)) {
normalizedFieldValue = normalizedFieldValue.substring(0, loc);
@@ -387,39 +402,48 @@ public class RangeCalculator extends QueryParser {
Key startRange = new Key(new Text(normalizedFieldValue + "\u0000"), fieldName);
Key endRange = new Key(new Text(normalizedFieldValue + "\u10FFFF"), fieldName);
Range r = new Range(startRange, true, endRange, true);
-
+
MapKey key = new MapKey(fieldName.toString(), normalizedFieldValue);
key.setOriginalQueryValue(value);
this.originalQueryValues.put(value, key);
- if (!trailingWildcardRanges.containsKey(key))
+ if (!trailingWildcardRanges.containsKey(key)) {
trailingWildcardRanges.put(key, new HashSet<Range>());
+ }
trailingWildcardRanges.get(key).add(r);
} else {
- // throw new RuntimeException("Unsupported wildcard location. Only trailing or leading wildcards are supported: " + normalizedFieldValue);
- // Don't throw an exception, there must be a wildcard in the query, we'll treat it as a filter on the results since it is not
+ // throw new RuntimeException("Unsupported wildcard location. Only trailing or
+ // leading wildcards are supported: " + normalizedFieldValue);
+ // Don't throw an exception, there must be a wildcard in the query, we'll treat it
+ // as a filter on the results since it is not
// leading or trailing.
}
}
// RANGES
- } else if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTGTNode.class))
- || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTGENode.class))) {
+ } else if (entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTGTNode.class))
+ || entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTGENode.class))) {
// Then we have a lower bound to a range query
- if (!rangeMap.containsKey(fieldName))
+ if (!rangeMap.containsKey(fieldName)) {
rangeMap.put(fieldName, new RangeBounds());
+ }
rangeMap.get(fieldName).setLower(fieldValue);
rangeMap.get(fieldName).setOriginalLower(value);
- } else if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTLTNode.class))
- || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTLENode.class))) {
+ } else if (entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTLTNode.class))
+ || entry.getValue().getOperator()
+ .equals(JexlOperatorConstants.getOperator(ASTLENode.class))) {
// Then we have an upper bound to a range query
- if (!rangeMap.containsKey(fieldName))
+ if (!rangeMap.containsKey(fieldName)) {
rangeMap.put(fieldName, new RangeBounds());
+ }
rangeMap.get(fieldName).setUpper(fieldValue);
rangeMap.get(fieldName).setOriginalUpper(value);
}
}
}
}
-
+
// INDEX RANGE QUERY
// Now that we have figured out the range bounds, create the index ranges.
for (Entry<Text,RangeBounds> entry : rangeMap.entrySet()) {
@@ -437,156 +461,169 @@ public class RangeCalculator extends QueryParser {
Key startRange = new Key(lower, entry.getKey());
Key endRange = new Key(upper, entry.getKey());
Range r = new Range(startRange, true, endRange, true);
- // For the range queries we need to query the global index and then handle the results a little differently.
- Map<MapKey,Set<Range>> ranges = new HashMap<MapKey,Set<Range>>();
+ // For the range queries we need to query the global index and then handle the results a
+ // little differently.
+ Map<MapKey,Set<Range>> ranges = new HashMap<>();
MapKey key = new MapKey(entry.getKey().toString(), entry.getValue().getLower().toString());
key.setOriginalQueryValue(entry.getValue().getOriginalLower().toString());
this.originalQueryValues.put(entry.getValue().getOriginalLower().toString(), key);
ranges.put(key, new HashSet<Range>());
ranges.get(key).add(r);
-
+
// Now query the global index and override the field value used in the results map
try {
- Map<MapKey,TermRange> lowerResults = queryGlobalIndex(ranges, entry.getKey().toString(), this.indexTableName, false, key, typeFilter);
+ Map<MapKey,TermRange> lowerResults = queryGlobalIndex(ranges, entry.getKey().toString(),
+ this.indexTableName, false, key, typeFilter);
// Add the results to the global index results for both the upper and lower field values.
- Map<MapKey,TermRange> upperResults = new HashMap<MapKey,TermRange>();
+ Map<MapKey,TermRange> upperResults = new HashMap<>();
for (Entry<MapKey,TermRange> e : lowerResults.entrySet()) {
- MapKey key2 = new MapKey(e.getKey().getFieldName(), entry.getValue().getUpper().toString());
+ MapKey key2 =
+ new MapKey(e.getKey().getFieldName(), entry.getValue().getUpper().toString());
key2.setOriginalQueryValue(entry.getValue().getOriginalUpper().toString());
upperResults.put(key2, e.getValue());
this.originalQueryValues.put(entry.getValue().getOriginalUpper(), key2);
-
+
}
-
+
this.globalIndexResults.putAll(lowerResults);
this.globalIndexResults.putAll(upperResults);
-
+
} catch (TableNotFoundException e) {
log.error("index table not found", e);
throw new RuntimeException(" index table not found", e);
}
} else {
- log.warn("Unbounded range detected, not querying index for it. Field " + entry.getKey().toString() + " in query: " + query);
+ log.warn("Unbounded range detected, not querying index for it. Field "
+ + entry.getKey().toString() + " in query: " + query);
}
}
// Now that we have calculated all of the ranges, query the global index.
try {
-
+
// Query for the trailing wildcards if we have any
for (Entry<MapKey,Set<Range>> trailing : trailingWildcardRanges.entrySet()) {
- Map<MapKey,Set<Range>> m = new HashMap<MapKey,Set<Range>>();
+ Map<MapKey,Set<Range>> m = new HashMap<>();
m.put(trailing.getKey(), trailing.getValue());
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Ranges for Wildcard Global Index query: " + m.toString());
- this.globalIndexResults.putAll(queryGlobalIndex(m, trailing.getKey().getFieldName(), this.indexTableName, false, trailing.getKey(), typeFilter));
+ }
+ this.globalIndexResults.putAll(queryGlobalIndex(m, trailing.getKey().getFieldName(),
+ this.indexTableName, false, trailing.getKey(), typeFilter));
}
-
+
// Query for the leading wildcards if we have any
for (Entry<MapKey,Set<Range>> leading : leadingWildcardRanges.entrySet()) {
- Map<MapKey,Set<Range>> m = new HashMap<MapKey,Set<Range>>();
+ Map<MapKey,Set<Range>> m = new HashMap<>();
m.put(leading.getKey(), leading.getValue());
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Ranges for Wildcard Global Reverse Index query: " + m.toString());
- this.globalIndexResults.putAll(queryGlobalIndex(m, leading.getKey().getFieldName(), this.reverseIndexTableName, true, leading.getKey(), typeFilter));
+ }
+ this.globalIndexResults.putAll(queryGlobalIndex(m, leading.getKey().getFieldName(),
+ this.reverseIndexTableName, true, leading.getKey(), typeFilter));
}
-
+
// Query for the equals case
for (Entry<MapKey,Set<Range>> equals : indexRanges.entrySet()) {
- Map<MapKey,Set<Range>> m = new HashMap<MapKey,Set<Range>>();
+ Map<MapKey,Set<Range>> m = new HashMap<>();
m.put(equals.getKey(), equals.getValue());
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Ranges for Global Index query: " + m.toString());
- this.globalIndexResults.putAll(queryGlobalIndex(m, equals.getKey().getFieldName(), this.indexTableName, false, equals.getKey(), typeFilter));
+ }
+ this.globalIndexResults.putAll(queryGlobalIndex(m, equals.getKey().getFieldName(),
+ this.indexTableName, false, equals.getKey(), typeFilter));
}
} catch (TableNotFoundException e) {
log.error("index table not found", e);
throw new RuntimeException(" index table not found", e);
}
-
- if (log.isDebugEnabled())
+
+ if (log.isDebugEnabled()) {
log.debug("Ranges from Global Index query: " + globalIndexResults.toString());
-
+ }
+
// Now traverse the AST
EvaluationContext ctx = new EvaluationContext();
this.getAST().childrenAccept(this, ctx);
-
+
if (ctx.lastRange.getRanges().size() == 0) {
log.debug("No resulting range set");
} else {
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Setting range results to: " + ctx.lastRange.getRanges().toString());
+ }
this.result = ctx.lastRange.getRanges();
}
}
-
+
/**
- *
+ *
* @return set of ranges to use for the shard table
*/
public Set<Range> getResult() {
return result;
}
-
+
/**
- *
+ *
* @return map of field names to index field values
*/
public Multimap<String,String> getIndexEntries() {
return indexEntries;
}
-
+
public Map<String,String> getIndexValues() {
return indexValues;
}
-
+
/**
- *
+ *
* @return Cardinality for each field name.
*/
public Map<String,Long> getTermCardinalities() {
return termCardinalities;
}
-
+
/**
- *
- * @param indexRanges
- * @param tableName
+ *
* @param isReverse
* switch that determines whether or not to reverse the results
* @param override
* mapKey for wildcard and range queries that specify which mapkey to use in the results
* @param typeFilter
* - optional list of datatypes
- * @throws TableNotFoundException
*/
- protected Map<MapKey,TermRange> queryGlobalIndex(Map<MapKey,Set<Range>> indexRanges, String specificFieldName, String tableName, boolean isReverse,
- MapKey override, Set<String> typeFilter) throws TableNotFoundException {
-
+ protected Map<MapKey,TermRange> queryGlobalIndex(Map<MapKey,Set<Range>> indexRanges,
+ String specificFieldName, String tableName, boolean isReverse, MapKey override,
+ Set<String> typeFilter) throws TableNotFoundException {
+
// The results map where the key is the field name and field value and the
// value is a set of ranges. The mapkey will always be the field name
// and field value that was passed in the original query. The TermRange
// will contain the field name and field value found in the index.
- Map<MapKey,TermRange> results = new HashMap<MapKey,TermRange>();
-
+ Map<MapKey,TermRange> results = new HashMap<>();
+
// Seed the results map and create the range set for the batch scanner
- Set<Range> rangeSuperSet = new HashSet<Range>();
+ Set<Range> rangeSuperSet = new HashSet<>();
for (Entry<MapKey,Set<Range>> entry : indexRanges.entrySet()) {
rangeSuperSet.addAll(entry.getValue());
TermRange tr = new TermRange(entry.getKey().getFieldName(), entry.getKey().getFieldValue());
- if (null == override)
+ if (null == override) {
results.put(entry.getKey(), tr);
- else
+ } else {
results.put(override, tr);
+ }
+ }
+
+ if (log.isDebugEnabled()) {
+ log.debug("Querying global index table: " + tableName + ", range: " + rangeSuperSet.toString()
+ + " colf: " + specificFieldName);
}
-
- if (log.isDebugEnabled())
- log.debug("Querying global index table: " + tableName + ", range: " + rangeSuperSet.toString() + " colf: " + specificFieldName);
BatchScanner bs = this.c.createBatchScanner(tableName, this.auths, this.queryThreads);
bs.setRanges(rangeSuperSet);
if (null != specificFieldName) {
bs.fetchColumnFamily(new Text(specificFieldName));
}
-
+
for (Entry<Key,Value> entry : bs) {
if (log.isDebugEnabled()) {
log.debug("Index entry: " + entry.getKey().toString());
@@ -598,7 +635,7 @@ public class RangeCalculator extends QueryParser {
StringBuilder buf = new StringBuilder(entry.getKey().getRow().toString());
fieldValue = buf.reverse().toString();
}
-
+
String fieldName = entry.getKey().getColumnFamily().toString();
// Get the shard id and datatype from the colq
String colq = entry.getKey().getColumnQualifier().toString();
@@ -612,8 +649,9 @@ public class RangeCalculator extends QueryParser {
shardId = colq;
}
// Skip this entry if the type is not correct
- if (null != datatype && null != typeFilter && !typeFilter.contains(datatype))
+ if (null != datatype && null != typeFilter && !typeFilter.contains(datatype)) {
continue;
+ }
// Parse the UID.List object from the value
Uid.List uidList = null;
try {
@@ -622,7 +660,7 @@ public class RangeCalculator extends QueryParser {
// Don't add UID information, at least we know what shards
// it is located in.
}
-
+
// Add the count for this shard to the total count for the term.
long count = 0;
Long storedCount = termCardinalities.get(fieldName);
@@ -633,39 +671,43 @@ public class RangeCalculator extends QueryParser {
}
termCardinalities.put(fieldName, count);
this.indexEntries.put(fieldName, fieldValue);
-
- if (null == override)
+
+ if (null == override) {
this.indexValues.put(fieldValue, fieldValue);
- else
+ } else {
this.indexValues.put(fieldValue, override.getOriginalQueryValue());
-
+ }
+
// Create the keys
Text shard = new Text(shardId);
if (uidList.getIGNORE()) {
// Then we create a scan range that is the entire shard
- if (null == override)
+ if (null == override) {
results.get(new MapKey(fieldName, fieldValue)).add(new Range(shard));
- else
+ } else {
results.get(override).add(new Range(shard));
+ }
} else {
// We should have UUIDs, create event ranges
for (String uuid : uidList.getUIDList()) {
Text cf = new Text(datatype);
TextUtil.textAppend(cf, uuid);
Key startKey = new Key(shard, cf);
- Key endKey = new Key(shard, new Text(cf.toString() + EvaluatingIterator.NULL_BYTE_STRING));
+ Key endKey =
+ new Key(shard, new Text(cf.toString() + EvaluatingIterator.NULL_BYTE_STRING));
Range eventRange = new Range(startKey, true, endKey, false);
- if (null == override)
+ if (null == override) {
results.get(new MapKey(fieldName, fieldValue)).add(eventRange);
- else
+ } else {
results.get(override).add(eventRange);
+ }
}
}
}
bs.close();
return results;
}
-
+
@Override
public Object visit(ASTOrNode node, Object data) {
boolean previouslyInOrContext = false;
@@ -680,31 +722,38 @@ public class RangeCalculator extends QueryParser {
// Process both sides of this node. Left branch first
node.jjtGetChild(0).jjtAccept(this, ctx);
Long leftCardinality = this.termCardinalities.get(ctx.lastProcessedTerm);
- if (null == leftCardinality)
+ if (null == leftCardinality) {
leftCardinality = 0L;
+ }
TermRange leftRange = ctx.lastRange;
- if (log.isDebugEnabled())
- log.debug("[OR-left] term: " + ctx.lastProcessedTerm + ", cardinality: " + leftCardinality + ", ranges: " + leftRange.getRanges().size());
-
+ if (log.isDebugEnabled()) {
+ log.debug("[OR-left] term: " + ctx.lastProcessedTerm + ", cardinality: " + leftCardinality
+ + ", ranges: " + leftRange.getRanges().size());
+ }
+
// Process the right branch
node.jjtGetChild(1).jjtAccept(this, ctx);
Long rightCardinality = this.termCardinalities.get(ctx.lastProcessedTerm);
- if (null == rightCardinality)
+ if (null == rightCardinality) {
rightCardinality = 0L;
+ }
TermRange rightRange = ctx.lastRange;
- if (log.isDebugEnabled())
- log.debug("[OR-right] term: " + ctx.lastProcessedTerm + ", cardinality: " + rightCardinality + ", ranges: " + rightRange.getRanges().size());
-
+ if (log.isDebugEnabled()) {
+ log.debug("[OR-right] term: " + ctx.lastProcessedTerm + ", cardinality: " + rightCardinality
+ + ", ranges: " + rightRange.getRanges().size());
+ }
+
// reset the state
- if (null != data && !previouslyInOrContext)
+ if (null != data && !previouslyInOrContext) {
ctx.inOrContext = false;
+ }
// Add the ranges for the left and right branches to a TreeSet to sort them
- Set<Range> ranges = new TreeSet<Range>();
+ Set<Range> ranges = new TreeSet<>();
ranges.addAll(leftRange.getRanges());
ranges.addAll(rightRange.getRanges());
// Now create the union set
- Set<Text> shardsAdded = new HashSet<Text>();
- Set<Range> returnSet = new HashSet<Range>();
+ Set<Text> shardsAdded = new HashSet<>();
+ Set<Range> returnSet = new HashSet<>();
for (Range r : ranges) {
if (!shardsAdded.contains(r.getStartKey().getRow())) {
// Only add ranges with a start key for the entire shard.
@@ -714,21 +763,23 @@ public class RangeCalculator extends QueryParser {
returnSet.add(r);
} else {
// if (log.isTraceEnabled())
- log.info("Skipping event specific range: " + r.toString() + " because shard range has already been added: "
+ log.info("Skipping event specific range: " + r.toString()
+ + " because shard range has already been added: "
+ shardsAdded.contains(r.getStartKey().getRow()));
}
}
// Clear the ranges from the context and add the result in its place
TermRange orRange = new TermRange("OR_RESULT", "foo");
orRange.addAll(returnSet);
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("[OR] results: " + orRange.getRanges().toString());
+ }
ctx.lastRange = orRange;
ctx.lastProcessedTerm = "OR_RESULT";
this.termCardinalities.put("OR_RESULT", (leftCardinality + rightCardinality));
return null;
}
-
+
@Override
public Object visit(ASTAndNode node, Object data) {
boolean previouslyInAndContext = false;
@@ -744,44 +795,53 @@ public class RangeCalculator extends QueryParser {
node.jjtGetChild(0).jjtAccept(this, ctx);
String leftTerm = ctx.lastProcessedTerm;
Long leftCardinality = this.termCardinalities.get(leftTerm);
- if (null == leftCardinality)
+ if (null == leftCardinality) {
leftCardinality = 0L;
+ }
TermRange leftRange = ctx.lastRange;
- if (log.isDebugEnabled())
- log.debug("[AND-left] term: " + ctx.lastProcessedTerm + ", cardinality: " + leftCardinality + ", ranges: " + leftRange.getRanges().size());
-
+ if (log.isDebugEnabled()) {
+ log.debug("[AND-left] term: " + ctx.lastProcessedTerm + ", cardinality: " + leftCardinality
+ + ", ranges: " + leftRange.getRanges().size());
+ }
+
// Process the right branch
node.jjtGetChild(1).jjtAccept(this, ctx);
String rightTerm = ctx.lastProcessedTerm;
Long rightCardinality = this.termCardinalities.get(rightTerm);
- if (null == rightCardinality)
+ if (null == rightCardinality) {
rightCardinality = 0L;
+ }
TermRange rightRange = ctx.lastRange;
- if (log.isDebugEnabled())
- log.debug("[AND-right] term: " + ctx.lastProcessedTerm + ", cardinality: " + rightCardinality + ", ranges: " + rightRange.getRanges().size());
-
+ if (log.isDebugEnabled()) {
+ log.debug("[AND-right] term: " + ctx.lastProcessedTerm + ", cardinality: " + rightCardinality
+ + ", ranges: " + rightRange.getRanges().size());
+ }
+
// reset the state
- if (null != data && !previouslyInAndContext)
+ if (null != data && !previouslyInAndContext) {
ctx.inAndContext = false;
-
+ }
+
long card = 0L;
TermRange andRange = new TermRange("AND_RESULT", "foo");
if ((leftCardinality > 0 && leftCardinality <= rightCardinality) || rightCardinality == 0) {
card = leftCardinality;
andRange.addAll(leftRange.getRanges());
- } else if ((rightCardinality > 0 && rightCardinality <= leftCardinality) || leftCardinality == 0) {
+ } else if ((rightCardinality > 0 && rightCardinality <= leftCardinality)
+ || leftCardinality == 0) {
card = rightCardinality;
andRange.addAll(rightRange.getRanges());
}
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("[AND] results: " + andRange.getRanges().toString());
+ }
ctx.lastRange = andRange;
ctx.lastProcessedTerm = "AND_RESULT";
this.termCardinalities.put("AND_RESULT", card);
-
+
return null;
}
-
+
@Override
public Object visit(ASTEQNode node, Object data) {
StringBuilder fieldName = new StringBuilder();
@@ -790,38 +850,44 @@ public class RangeCalculator extends QueryParser {
Object left = node.jjtGetChild(0).jjtAccept(this, data);
Object right = node.jjtGetChild(1).jjtAccept(this, data);
// Ignore functions in the query
- if (left instanceof FunctionResult || right instanceof FunctionResult)
+ if (left instanceof FunctionResult || right instanceof FunctionResult) {
return null;
+ }
decodeResults(left, right, fieldName, value);
// We need to check to see if we are in a NOT context. If so,
// then we need to reverse the negation.
boolean negated = false;
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
- if (ctx.inNotContext)
+ if (ctx.inNotContext) {
negated = !negated;
+ }
}
- QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject());
+ QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()),
+ value.getObject());
termsCopy.put(fieldName.toString(), term);
// Get the terms from the global index
// Remove the begin and end ' marks
String termValue = null;
- if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'"))
+ if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) {
termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1);
- else
+ } else {
termValue = (String) term.getValue();
+ }
// Get the values found in the index for this query term
TermRange ranges = null;
for (MapKey key : this.originalQueryValues.get(termValue)) {
if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) {
ranges = this.globalIndexResults.get(key);
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Results for cached index ranges for key: " + key + " are " + ranges);
+ }
}
}
// If no result for this field name and value, then add empty range
- if (null == ranges)
- ranges = new TermRange(fieldName.toString(), (String) term.getValue());
+ if (null == ranges) {
+ ranges = new TermRange(fieldName.toString(), term.getValue());
+ }
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
ctx.lastRange = ranges;
@@ -829,7 +895,7 @@ public class RangeCalculator extends QueryParser {
}
return null;
}
-
+
@Override
public Object visit(ASTNENode node, Object data) {
StringBuilder fieldName = new StringBuilder();
@@ -838,20 +904,24 @@ public class RangeCalculator extends QueryParser {
Object left = node.jjtGetChild(0).jjtAccept(this, data);
Object right = node.jjtGetChild(1).jjtAccept(this, data);
// Ignore functions in the query
- if (left instanceof FunctionResult || right instanceof FunctionResult)
+ if (left instanceof FunctionResult || right instanceof FunctionResult) {
return null;
+ }
decodeResults(left, right, fieldName, value);
// We need to check to see if we are in a NOT context. If so,
// then we need to reverse the negation.
boolean negated = true;
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
- if (ctx.inNotContext)
+ if (ctx.inNotContext) {
negated = !negated;
+ }
}
- if (negated)
+ if (negated) {
negatedTerms.add(fieldName.toString());
- QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject());
+ }
+ QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()),
+ value.getObject());
termsCopy.put(fieldName.toString(), term);
// We can only use the global index for equality, put in fake results
if (null != data && data instanceof EvaluationContext) {
@@ -862,7 +932,7 @@ public class RangeCalculator extends QueryParser {
}
return null;
}
-
+
@Override
public Object visit(ASTLTNode node, Object data) {
StringBuilder fieldName = new StringBuilder();
@@ -871,38 +941,44 @@ public class RangeCalculator extends QueryParser {
Object left = node.jjtGetChild(0).jjtAccept(this, data);
Object right = node.jjtGetChild(1).jjtAccept(this, data);
// Ignore functions in the query
- if (left instanceof FunctionResult || right instanceof FunctionResult)
+ if (left instanceof FunctionResult || right instanceof FunctionResult) {
return null;
+ }
decodeResults(left, right, fieldName, value);
// We need to check to see if we are in a NOT context. If so,
// then we need to reverse the negation.
boolean negated = false;
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
- if (ctx.inNotContext)
+ if (ctx.inNotContext) {
negated = !negated;
+ }
}
- QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject());
+ QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()),
+ value.getObject());
termsCopy.put(fieldName.toString(), term);
// Get the terms from the global index
// Remove the begin and end ' marks
String termValue = null;
- if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'"))
+ if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) {
termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1);
- else
+ } else {
termValue = (String) term.getValue();
+ }
// Get the values found in the index for this query term
TermRange ranges = null;
for (MapKey key : this.originalQueryValues.get(termValue)) {
if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) {
ranges = this.globalIndexResults.get(key);
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Results for cached index ranges for key: " + key + " are " + ranges);
+ }
}
}
// If no result for this field name and value, then add empty range
- if (null == ranges)
- ranges = new TermRange(fieldName.toString(), (String) term.getValue());
+ if (null == ranges) {
+ ranges = new TermRange(fieldName.toString(), term.getValue());
+ }
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
ctx.lastRange = ranges;
@@ -910,7 +986,7 @@ public class RangeCalculator extends QueryParser {
}
return null;
}
-
+
@Override
public Object visit(ASTGTNode node, Object data) {
StringBuilder fieldName = new StringBuilder();
@@ -919,38 +995,44 @@ public class RangeCalculator extends QueryParser {
Object left = node.jjtGetChild(0).jjtAccept(this, data);
Object right = node.jjtGetChild(1).jjtAccept(this, data);
// Ignore functions in the query
- if (left instanceof FunctionResult || right instanceof FunctionResult)
+ if (left instanceof FunctionResult || right instanceof FunctionResult) {
return null;
+ }
decodeResults(left, right, fieldName, value);
// We need to check to see if we are in a NOT context. If so,
// then we need to reverse the negation.
boolean negated = false;
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
- if (ctx.inNotContext)
+ if (ctx.inNotContext) {
negated = !negated;
+ }
}
- QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject());
+ QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()),
+ value.getObject());
termsCopy.put(fieldName.toString(), term);
// Get the terms from the global index
// Remove the begin and end ' marks
String termValue = null;
- if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'"))
+ if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) {
termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1);
- else
+ } else {
termValue = (String) term.getValue();
+ }
// Get the values found in the index for this query term
TermRange ranges = null;
for (MapKey key : this.originalQueryValues.get(termValue)) {
if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) {
ranges = this.globalIndexResults.get(key);
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Results for cached index ranges for key: " + key + " are " + ranges);
+ }
}
}
// If no result for this field name and value, then add empty range
- if (null == ranges)
- ranges = new TermRange(fieldName.toString(), (String) term.getValue());
+ if (null == ranges) {
+ ranges = new TermRange(fieldName.toString(), term.getValue());
+ }
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
ctx.lastRange = ranges;
@@ -958,7 +1040,7 @@ public class RangeCalculator extends QueryParser {
}
return null;
}
-
+
@Override
public Object visit(ASTLENode node, Object data) {
StringBuilder fieldName = new StringBuilder();
@@ -967,38 +1049,44 @@ public class RangeCalculator extends QueryParser {
Object left = node.jjtGetChild(0).jjtAccept(this, data);
Object right = node.jjtGetChild(1).jjtAccept(this, data);
// Ignore functions in the query
- if (left instanceof FunctionResult || right instanceof FunctionResult)
+ if (left instanceof FunctionResult || right instanceof FunctionResult) {
return null;
+ }
decodeResults(left, right, fieldName, value);
// We need to check to see if we are in a NOT context. If so,
// then we need to reverse the negation.
boolean negated = false;
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
- if (ctx.inNotContext)
+ if (ctx.inNotContext) {
negated = !negated;
+ }
}
- QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject());
+ QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()),
+ value.getObject());
termsCopy.put(fieldName.toString(), term);
// Get the terms from the global index
// Remove the begin and end ' marks
String termValue = null;
- if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'"))
+ if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) {
termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1);
- else
+ } else {
termValue = (String) term.getValue();
+ }
// Get the values found in the index for this query term
TermRange ranges = null;
for (MapKey key : this.originalQueryValues.get(termValue)) {
if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) {
ranges = this.globalIndexResults.get(key);
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Results for cached index ranges for key: " + key + " are " + ranges);
+ }
}
}
// If no result for this field name and value, then add empty range
- if (null == ranges)
- ranges = new TermRange(fieldName.toString(), (String) term.getValue());
+ if (null == ranges) {
+ ranges = new TermRange(fieldName.toString(), term.getValue());
+ }
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
ctx.lastRange = ranges;
@@ -1006,7 +1094,7 @@ public class RangeCalculator extends QueryParser {
}
return null;
}
-
+
@Override
public Object visit(ASTGENode node, Object data) {
StringBuilder fieldName = new StringBuilder();
@@ -1015,38 +1103,44 @@ public class RangeCalculator extends QueryParser {
Object left = node.jjtGetChild(0).jjtAccept(this, data);
Object right = node.jjtGetChild(1).jjtAccept(this, data);
// Ignore functions in the query
- if (left instanceof FunctionResult || right instanceof FunctionResult)
+ if (left instanceof FunctionResult || right instanceof FunctionResult) {
return null;
+ }
decodeResults(left, right, fieldName, value);
// We need to check to see if we are in a NOT context. If so,
// then we need to reverse the negation.
boolean negated = false;
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
- if (ctx.inNotContext)
+ if (ctx.inNotContext) {
negated = !negated;
+ }
}
- QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject());
+ QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()),
+ value.getObject());
termsCopy.put(fieldName.toString(), term);
// Get the terms from the global index
// Remove the begin and end ' marks
String termValue = null;
- if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'"))
+ if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) {
termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1);
- else
+ } else {
termValue = (String) term.getValue();
+ }
// Get the values found in the index for this query term
TermRange ranges = null;
for (MapKey key : this.originalQueryValues.get(termValue)) {
if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) {
ranges = this.globalIndexResults.get(key);
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Results for cached index ranges for key: " + key + " are " + ranges);
+ }
}
}
// If no result for this field name and value, then add empty range
- if (null == ranges)
- ranges = new TermRange(fieldName.toString(), (String) term.getValue());
+ if (null == ranges) {
+ ranges = new TermRange(fieldName.toString(), term.getValue());
+ }
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
ctx.lastRange = ranges;
@@ -1054,7 +1148,7 @@ public class RangeCalculator extends QueryParser {
}
return null;
}
-
+
@Override
public Object visit(ASTERNode node, Object data) {
StringBuilder fieldName = new StringBuilder();
@@ -1063,47 +1157,53 @@ public class RangeCalculator extends QueryParser {
Object left = node.jjtGetChild(0).jjtAccept(this, data);
Object right = node.jjtGetChild(1).jjtAccept(this, data);
// Ignore functions in the query
- if (left instanceof FunctionResult || right instanceof FunctionResult)
+ if (left instanceof FunctionResult || right instanceof FunctionResult) {
return null;
+ }
decodeResults(left, right, fieldName, value);
// We need to check to see if we are in a NOT context. If so,
// then we need to reverse the negation.
boolean negated = false;
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
- if (ctx.inNotContext)
+ if (ctx.inNotContext) {
negated = !negated;
+ }
}
- QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject());
+ QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()),
+ value.getObject());
termsCopy.put(fieldName.toString(), term);
// Get the terms from the global index
// Remove the begin and end ' marks
String termValue = null;
- if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'"))
+ if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) {
termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1);
- else
+ } else {
termValue = (String) term.getValue();
+ }
// Get the values found in the index for this query term
TermRange ranges = null;
for (MapKey key : this.originalQueryValues.get(termValue)) {
if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) {
ranges = this.globalIndexResults.get(key);
- if (log.isDebugEnabled())
+ if (log.isDebugEnabled()) {
log.debug("Results for cached index ranges for key: " + key + " are " + ranges);
+ }
}
}
// If no result for this field name and value, then add empty range
- if (null == ranges)
- ranges = new TermRange(fieldName.toString(), (String) term.getValue());
+ if (null == ranges) {
+ ranges = new TermRange(fieldName.toString(), term.getValue());
+ }
if (null != data && data instanceof EvaluationContext) {
EvaluationContext ctx = (EvaluationContext) data;
ctx.lastRange = ranges;
ctx.lastProcessedTerm = fieldName.toString();
}
-
+
return null;
}
-
... 1067 lines suppressed ...