You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by js...@apache.org on 2016/09/23 19:03:15 UTC

nifi git commit: NIFI-2787 truncate flowfile attributes that get indexed to fit within Lucene limits

Repository: nifi
Updated Branches:
  refs/heads/0.x a952dc96a -> 5beaf8c5a


NIFI-2787 truncate flowfile attributes that get indexed to fit within Lucene limits

* jskora adjusted for minor 0.x vs 1.x differences in TestPersistentProvenanceRepository class.

Signed-off-by: Joe Skora <js...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/5beaf8c5
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/5beaf8c5
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/5beaf8c5

Branch: refs/heads/0.x
Commit: 5beaf8c5ad99202c2e42eca159a1abb082b39899
Parents: a952dc9
Author: Mike Moser <mo...@apache.org>
Authored: Wed Sep 21 16:10:49 2016 -0400
Committer: Joe Skora <js...@apache.org>
Committed: Fri Sep 23 15:02:06 2016 -0400

----------------------------------------------------------------------
 .../nifi/provenance/lucene/IndexingAction.java  |  2 +-
 .../nifi/provenance/lucene/LuceneUtil.java      | 40 +++++++++++++++++
 .../TestPersistentProvenanceRepository.java     | 45 ++++++++++++++++++++
 3 files changed, 86 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi/blob/5beaf8c5/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/IndexingAction.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/IndexingAction.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/IndexingAction.java
index 46be391..9182151 100644
--- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/IndexingAction.java
+++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/IndexingAction.java
@@ -74,7 +74,7 @@ public class IndexingAction {
         }
 
         for (final SearchableField searchableField : attributeSearchableFields) {
-            addField(doc, searchableField, attributes.get(searchableField.getSearchableFieldName()), Store.NO);
+            addField(doc, searchableField, LuceneUtil.truncateIndexField(attributes.get(searchableField.getSearchableFieldName())), Store.NO);
         }
 
         final String storageFilename = LuceneUtil.substringBefore(record.getStorageFilename(), ".");

http://git-wip-us.apache.org/repos/asf/nifi/blob/5beaf8c5/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/LuceneUtil.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/LuceneUtil.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/LuceneUtil.java
index 08a99d6..56e871f 100644
--- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/LuceneUtil.java
+++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/LuceneUtil.java
@@ -17,6 +17,12 @@
 package org.apache.nifi.provenance.lucene;
 
 import java.io.File;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -30,6 +36,7 @@ import org.apache.nifi.processor.DataUnit;
 import org.apache.nifi.provenance.SearchableFields;
 import org.apache.nifi.provenance.search.SearchTerm;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
@@ -194,4 +201,37 @@ public class LuceneUtil {
         }
         return documentGroups;
     }
+
+    /**
+     * Truncate a single field so that it does not exceed Lucene's byte size limit on indexed terms.
+     *
+     * @param field the string to be indexed
+     * @return a string that can be indexed which is within Lucene's byte size limit, or null if anything goes wrong
+     */
+    public static String truncateIndexField(String field) {
+        if (field == null) {
+            return field;
+        }
+
+        Charset charset = Charset.defaultCharset();
+        byte[] bytes = field.getBytes(charset);
+        if (bytes.length <= IndexWriter.MAX_TERM_LENGTH) {
+            return field;
+        }
+
+        // chop the field to maximum allowed byte length
+        ByteBuffer bbuf = ByteBuffer.wrap(bytes, 0, IndexWriter.MAX_TERM_LENGTH);
+
+        try {
+            // decode the chopped byte buffer back into original charset
+            CharsetDecoder decoder = charset.newDecoder();
+            decoder.onMalformedInput(CodingErrorAction.IGNORE);
+            decoder.reset();
+            CharBuffer cbuf = decoder.decode(bbuf);
+            return cbuf.toString();
+        } catch (CharacterCodingException shouldNotHappen) {}
+
+        // if we get here, something bad has happened
+        return null;
+    }
 }

http://git-wip-us.apache.org/repos/asf/nifi/blob/5beaf8c5/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java
index 3238d97..f59c4da 100644
--- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java
+++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java
@@ -339,6 +339,51 @@ public class TestPersistentProvenanceRepository {
     }
 
     @Test
+    public void testIndexOnRolloverWithImmenseAttribute() throws IOException {
+        final RepositoryConfiguration config = createConfiguration();
+        config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS);
+        config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields()));
+        config.setSearchableAttributes(SearchableFieldParser.extractSearchableFields("immense", false));
+        repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS);
+        repo.initialize(getEventReporter());
+
+        int immenseAttrSize = 33000; // must be greater than 32766 for a meaningful test
+        StringBuilder immenseBldr = new StringBuilder(immenseAttrSize);
+        for (int i=0; i < immenseAttrSize; i++) {
+            immenseBldr.append('0');
+        }
+        final String uuid = "00000000-0000-0000-0000-000000000000";
+        final Map<String, String> attributes = new HashMap<>();
+        attributes.put("abc", "xyz");
+        attributes.put("xyz", "abc");
+        attributes.put("filename", "file-" + uuid);
+        attributes.put("immense", immenseBldr.toString());
+
+        final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
+        builder.setEventTime(System.currentTimeMillis());
+        builder.setEventType(ProvenanceEventType.RECEIVE);
+        builder.setTransitUri("nifi://unit-test");
+        builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
+        builder.setComponentId("1234");
+        builder.setComponentType("dummy processor");
+
+        for (int i = 0; i < 10; i++) {
+            attributes.put("uuid", "00000000-0000-0000-0000-00000000000" + i);
+            builder.fromFlowFile(createFlowFile(i, 3000L, attributes));
+            repo.registerEvent(builder.build());
+        }
+
+        repo.waitForRollover();
+
+        final Query query = new Query(UUID.randomUUID().toString());
+        query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.newSearchableAttribute("immense"), "000*"));
+        query.setMaxResults(100);
+
+        final QueryResult result = repo.queryEvents(query);
+        assertEquals(10, result.getMatchingEvents().size());
+    }
+
+    @Test
     public void testIndexOnRolloverAndSubsequentSearch() throws IOException, InterruptedException, ParseException {
         final RepositoryConfiguration config = createConfiguration();
         config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS);