You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@nifi.apache.org by arpadboda <gi...@git.apache.org> on 2018/11/20 12:11:07 UTC

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

GitHub user arpadboda opened a pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445

    MINIFICPP-681 - Add content hash processor

    Thank you for submitting a contribution to Apache NiFi - MiNiFi C++.
    
    In order to streamline the review of the contribution we ask you
    to ensure the following steps have been taken:
    
    ### For all changes:
    - [ ] Is there a JIRA ticket associated with this PR? Is it referenced
         in the commit message?
    
    - [ ] Does your PR title start with MINIFICPP-XXXX where XXXX is the JIRA number you are trying to resolve? Pay particular attention to the hyphen "-" character.
    
    - [ ] Has your PR been rebased against the latest commit within the target branch (typically master)?
    
    - [ ] Is your initial contribution a single, squashed commit?
    
    ### For code changes:
    - [ ] If adding new dependencies to the code, are these dependencies licensed in a way that is compatible for inclusion under [ASF 2.0](http://www.apache.org/legal/resolved.html#category-a)?
    - [ ] If applicable, have you updated the LICENSE file?
    - [ ] If applicable, have you updated the NOTICE file?
    
    ### For documentation related changes:
    - [ ] Have you ensured that format looks appropriate for the output in which it is rendered?
    
    ### Note:
    Please ensure that once the PR is submitted, you check travis-ci for build issues and submit an update to your PR as soon as possible.


You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/arpadboda/nifi-minifi-cpp MINIFICPP-681

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/nifi-minifi-cpp/pull/445.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #445
    
----
commit 3e1d31bc9f379bf46f29278ed5bd395d337771c5
Author: Arpad Boda <ab...@...>
Date:   2018-11-19T12:49:39Z

    MINIFICPP-681 - Add content hash processor

----


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235707060
  
    --- Diff: libminifi/include/processors/ContentHash.h ---
    @@ -0,0 +1,186 @@
    +/**
    + * @file ContentHash.h
    + * ContentHash class declaration
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +#ifndef NIFI_MINIFI_CPP_CONTENTHASH_H
    +#define NIFI_MINIFI_CPP_CONTENTHASH_H
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <iomanip>
    +#include <map>
    +#include <memory>
    +#include <string>
    +#include <sstream>
    +#include <utility>
    +#include <stdint.h>
    +
    +#include <openssl/md5.h>
    +#include <openssl/sha.h>
    +
    +#include "FlowFileRecord.h"
    +#include "core/Processor.h"
    +#include "core/ProcessSession.h"
    +#include "core/Resource.h"
    +#include "io/BaseStream.h"
    +
    +using HashReturnType = std::pair<std::string, int64_t>;
    +
    +namespace {
    +#define HASH_BUFFER_SIZE 16384
    +
    +  std::string digestToString(const unsigned char * const digest, size_t size) {
    +    std::stringstream ss;
    +    for(int i = 0; i < size; i++)
    +    {
    +      ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i];
    +    }
    +    return ss.str();
    +  }
    +
    +  HashReturnType MD5Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    MD5_CTX context;
    +    MD5_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        MD5_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +    unsigned char digest[MD5_DIGEST_LENGTH];
    +    MD5_Final(digest, &context);
    +
    +    ret_val.first = digestToString(digest, MD5_DIGEST_LENGTH);
    +    return ret_val;
    +  }
    +
    +  HashReturnType SHA1Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    SHA_CTX context;
    +    SHA1_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        SHA1_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +    unsigned char digest[SHA_DIGEST_LENGTH];
    +    SHA1_Final(digest, &context);
    +
    +    ret_val.first = digestToString(digest, SHA_DIGEST_LENGTH);
    +    return ret_val;
    +  }
    +
    --- End diff --
    
    Honestly I wanted to do but failed, given the digest, the init and the fine calls, moreover the digest length const all differ. 
    The header file is pure C, some tempalte magic there could easily help to reduce code duplication here, but I don't think we should do that and add it here. Maybe to a util file, but definitely not to a processor. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r237412934
  
    --- Diff: libminifi/src/processors/HashContent.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file HashContent.cpp
    + * HashContent class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/HashContent.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property HashContent::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property HashContent::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship HashContent::Success("success", "success operational on the flow record");
    +
    +void HashContent::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void HashContent::onSchedule(core::ProcessContext *context, core::ProcessSessionFactory *sessionFactory) {
    +  std::string value;
    +
    +  attrKey_ = (context->getProperty(HashAttribute.getName(), value)) ? value : "Checksum";
    +  algoName_ = (context->getProperty(HashAlgorithm.getName(), value)) ? value : "MD5";
    --- End diff --
    
    Sure, changed. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235461430
  
    --- Diff: libminifi/src/processors/ContentHash.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file ContentHash.cpp
    + * ContentHash class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/ContentHash.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property ContentHash::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property ContentHash::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship ContentHash::Success("success", "success operational on the flow record");
    +
    +void ContentHash::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void ContentHash::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
    +  std::shared_ptr<core::FlowFile> flowFile = session->get();
    +
    +  if (!flowFile) {
    +    return;
    +  }
    +
    +  ReadCallback cb(flowFile, context);
    +  session->read(flowFile, &cb);
    +  session->transfer(flowFile, Success);
    +}
    +
    +int64_t ContentHash::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
    +  std::string attrKey, algoName;
    +  ctx_->getProperty(HashAttribute.getName(), attrKey);
    --- End diff --
    
    Unless these are going to be dynamic they should be done in onSchedule. 


---

[GitHub] nifi-minifi-cpp issue #445: MINIFICPP-681 - Add content hash processor

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on the issue:

    https://github.com/apache/nifi-minifi-cpp/pull/445
  
    taking a look. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r237057613
  
    --- Diff: libminifi/src/processors/HashContent.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file HashContent.cpp
    + * HashContent class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/HashContent.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property HashContent::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property HashContent::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship HashContent::Success("success", "success operational on the flow record");
    +
    +void HashContent::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void HashContent::onSchedule(core::ProcessContext *context, core::ProcessSessionFactory *sessionFactory) {
    +  std::string value;
    +
    +  attrKey_ = (context->getProperty(HashAttribute.getName(), value)) ? value : "Checksum";
    +  algoName_ = (context->getProperty(HashAlgorithm.getName(), value)) ? value : "MD5";
    --- End diff --
    
    Default should probably be sha-256. I believe NiFi has transitioned to this. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235709568
  
    --- Diff: libminifi/src/processors/ContentHash.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file ContentHash.cpp
    + * ContentHash class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/ContentHash.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property ContentHash::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property ContentHash::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship ContentHash::Success("success", "success operational on the flow record");
    +
    +void ContentHash::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void ContentHash::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
    +  std::shared_ptr<core::FlowFile> flowFile = session->get();
    +
    +  if (!flowFile) {
    +    return;
    +  }
    +
    +  ReadCallback cb(flowFile, context);
    +  session->read(flowFile, &cb);
    +  session->transfer(flowFile, Success);
    +}
    +
    +int64_t ContentHash::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
    +  std::string attrKey, algoName;
    +  ctx_->getProperty(HashAttribute.getName(), attrKey);
    +  ctx_->getProperty(HashAlgorithm.getName(), algoName);
    +  std::transform(algoName.begin(), algoName.end(), algoName.begin(), ::toupper);
    +
    +  // Erase '-' to make sha-256 and sha-2 work, too
    +  algoName.erase(std::remove(algoName.begin(), algoName.end(), '-'), algoName.end());
    +
    +  // This throws in case algo is not found, but that's fine
    --- End diff --
    
    This is definitely a interesting point. 
    I would consider failure relationship being user for flowfile specific issues mostly. For eg. the processor expects some data to be present in attributes/content of the flowfile, and this criteria isn't met. 
    Although this case is about the processor being misconfigured, which in my view is a bit different. In an ideal world this shoudn't even happen as setting the property should fail in case the provided value is not one of the allowed ones. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r237057728
  
    --- Diff: libminifi/src/processors/HashContent.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file HashContent.cpp
    + * HashContent class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/HashContent.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property HashContent::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property HashContent::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship HashContent::Success("success", "success operational on the flow record");
    +
    +void HashContent::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void HashContent::onSchedule(core::ProcessContext *context, core::ProcessSessionFactory *sessionFactory) {
    +  std::string value;
    +
    +  attrKey_ = (context->getProperty(HashAttribute.getName(), value)) ? value : "Checksum";
    +  algoName_ = (context->getProperty(HashAlgorithm.getName(), value)) ? value : "MD5";
    --- End diff --
    
    I'm not sure that's "low power friendly," but they can deal with that through configuration. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235477945
  
    --- Diff: libminifi/include/processors/ContentHash.h ---
    @@ -0,0 +1,186 @@
    +/**
    + * @file ContentHash.h
    + * ContentHash class declaration
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +#ifndef NIFI_MINIFI_CPP_CONTENTHASH_H
    +#define NIFI_MINIFI_CPP_CONTENTHASH_H
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <iomanip>
    +#include <map>
    +#include <memory>
    +#include <string>
    +#include <sstream>
    +#include <utility>
    +#include <stdint.h>
    +
    +#include <openssl/md5.h>
    +#include <openssl/sha.h>
    +
    +#include "FlowFileRecord.h"
    +#include "core/Processor.h"
    +#include "core/ProcessSession.h"
    +#include "core/Resource.h"
    +#include "io/BaseStream.h"
    +
    +using HashReturnType = std::pair<std::string, int64_t>;
    +
    +namespace {
    +#define HASH_BUFFER_SIZE 16384
    +
    +  std::string digestToString(const unsigned char * const digest, size_t size) {
    +    std::stringstream ss;
    +    for(int i = 0; i < size; i++)
    +    {
    +      ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i];
    +    }
    +    return ss.str();
    +  }
    +
    +  HashReturnType MD5Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    MD5_CTX context;
    +    MD5_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        MD5_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    --- End diff --
    
    As mentioned, below, the HashReturnType seems like it might not be necessary. If ret < 0 on any given rad you exit the conditional and loop, then you proceed to call finalize on the hash functions with that partially written context. The code then supplies a digest that is potentially incorrect. Alternatively you can simply short circuit and return an empty string on any stream error and be guaranteed that the resulting hash is an error case. 


---

[GitHub] nifi-minifi-cpp issue #445: MINIFICPP-681 - Add content hash processor

Posted by alopresto <gi...@git.apache.org>.
Github user alopresto commented on the issue:

    https://github.com/apache/nifi-minifi-cpp/pull/445
  
    You can also look at [`CryptographicHashContent`](https://github.com/apache/nifi/blob/master/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/CryptographicHashContent.java) and [`HashService`](https://github.com/apache/nifi/blob/master/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/security/util/crypto/HashService.java) in NiFi to see how these actions are currently handled. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r237056704
  
    --- Diff: libminifi/include/processors/HashContent.h ---
    @@ -0,0 +1,196 @@
    +/**
    + * @file HashContent.h
    + * HashContent class declaration
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +#ifndef NIFI_MINIFI_CPP_HashContent_H
    +#define NIFI_MINIFI_CPP_HashContent_H
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <iomanip>
    +#include <map>
    +#include <memory>
    +#include <string>
    +#include <sstream>
    +#include <utility>
    +#include <stdint.h>
    +
    +#include <openssl/md5.h>
    +#include <openssl/sha.h>
    +
    +#include "FlowFileRecord.h"
    +#include "core/Processor.h"
    +#include "core/ProcessSession.h"
    +#include "core/Resource.h"
    +#include "io/BaseStream.h"
    +
    +using HashReturnType = std::pair<std::string, int64_t>;
    +
    +namespace {
    +#define HASH_BUFFER_SIZE 16384
    +
    +  std::string digestToString(const unsigned char * const digest, size_t size) {
    +    std::stringstream ss;
    +    for(int i = 0; i < size; i++)
    +    {
    +      ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i];
    +    }
    +    return ss.str();
    +  }
    +
    +  HashReturnType MD5Hash(const std::shared_ptr<org::apache::nifi::minifi::io::BaseStream>& stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    MD5_CTX context;
    +    MD5_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        MD5_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +
    +    if (ret_val.second > 0) {
    +      unsigned char digest[MD5_DIGEST_LENGTH];
    +      MD5_Final(digest, &context);
    +      ret_val.first = digestToString(digest, MD5_DIGEST_LENGTH);
    +    }
    +    return ret_val;
    +  }
    +
    +  HashReturnType SHA1Hash(const std::shared_ptr<org::apache::nifi::minifi::io::BaseStream>& stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    SHA_CTX context;
    +    SHA1_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        SHA1_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +
    +    if (ret_val.second > 0) {
    +      unsigned char digest[SHA_DIGEST_LENGTH];
    +      SHA1_Final(digest, &context);
    +      ret_val.first = digestToString(digest, SHA_DIGEST_LENGTH);
    +    }
    +    return ret_val;
    +  }
    +
    +  HashReturnType SHA256Hash(const std::shared_ptr<org::apache::nifi::minifi::io::BaseStream>& stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    SHA256_CTX context;
    +    SHA256_Init(&context);
    +
    +    size_t ret ;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        SHA256_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +
    +    if (ret_val.second > 0) {
    +      unsigned char digest[SHA256_DIGEST_LENGTH];
    +      SHA256_Final(digest, &context);
    +      ret_val.first = digestToString(digest, SHA256_DIGEST_LENGTH);
    +    }
    +    return ret_val;
    +  }
    +}
    +
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +static const std::map<std::string, const std::function<HashReturnType(const std::shared_ptr<io::BaseStream>&)>> HashAlgos =
    +  { {"MD5",  MD5Hash}, {"SHA1", SHA1Hash}, {"SHA256", SHA256Hash} };
    +
    +//! HashContent Class
    +class HashContent : public core::Processor {
    + public:
    +  //! Constructor
    +  /*!
    +  * Create a new processor
    +  */
    +  explicit HashContent(std::string name,  utils::Identifier uuid = utils::Identifier())
    +  : Processor(name, uuid)
    +  {
    +    logger_ = logging::LoggerFactory<HashContent>::getLogger();
    +  }
    +  //! Processor Name
    +  static constexpr char const* ProcessorName = "HashContent";
    +  //! Supported Properties
    +  static core::Property HashAttribute;
    +  static core::Property HashAlgorithm;
    +  //! Supported Relationships
    +  static core::Relationship Success;
    +
    +  void onSchedule(core::ProcessContext *context, core::ProcessSessionFactory *sessionFactory) override;
    --- End diff --
    
    I'm not sure how I missed this before, but we can't use override in libminifi due to some of the compilers in the environments we support not having that support yet in their version of GCC. It's used in extensions, but we kind of refrain from using this in libminifi until we can determine that those environments have acquiesced to later standards. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235472601
  
    --- Diff: libminifi/src/processors/ContentHash.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file ContentHash.cpp
    + * ContentHash class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/ContentHash.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property ContentHash::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property ContentHash::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship ContentHash::Success("success", "success operational on the flow record");
    +
    +void ContentHash::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void ContentHash::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
    +  std::shared_ptr<core::FlowFile> flowFile = session->get();
    +
    +  if (!flowFile) {
    +    return;
    +  }
    +
    +  ReadCallback cb(flowFile, context);
    +  session->read(flowFile, &cb);
    +  session->transfer(flowFile, Success);
    +}
    +
    +int64_t ContentHash::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
    +  std::string attrKey, algoName;
    +  ctx_->getProperty(HashAttribute.getName(), attrKey);
    +  ctx_->getProperty(HashAlgorithm.getName(), algoName);
    +  std::transform(algoName.begin(), algoName.end(), algoName.begin(), ::toupper);
    +
    +  // Erase '-' to make sha-256 and sha-2 work, too
    +  algoName.erase(std::remove(algoName.begin(), algoName.end(), '-'), algoName.end());
    +
    +  // This throws in case algo is not found, but that's fine
    --- End diff --
    
    Curious about the comment, "This throws in case algo is not found, but that's fine" What do you mean by "that's fine?" 
    That would cause a rollback, which may then put back pressure on the flow. This may not be desired. It doesn't allow the user to gracefully deal with the failure relationship. Might there be a way to deal with this such that failure is a condition we can account for in our relationships? 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r237412844
  
    --- Diff: libminifi/include/processors/HashContent.h ---
    @@ -0,0 +1,196 @@
    +/**
    + * @file HashContent.h
    + * HashContent class declaration
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +#ifndef NIFI_MINIFI_CPP_HashContent_H
    +#define NIFI_MINIFI_CPP_HashContent_H
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <iomanip>
    +#include <map>
    +#include <memory>
    +#include <string>
    +#include <sstream>
    +#include <utility>
    +#include <stdint.h>
    +
    +#include <openssl/md5.h>
    +#include <openssl/sha.h>
    +
    +#include "FlowFileRecord.h"
    +#include "core/Processor.h"
    +#include "core/ProcessSession.h"
    +#include "core/Resource.h"
    +#include "io/BaseStream.h"
    +
    +using HashReturnType = std::pair<std::string, int64_t>;
    +
    +namespace {
    +#define HASH_BUFFER_SIZE 16384
    +
    +  std::string digestToString(const unsigned char * const digest, size_t size) {
    +    std::stringstream ss;
    +    for(int i = 0; i < size; i++)
    +    {
    +      ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i];
    +    }
    +    return ss.str();
    +  }
    +
    +  HashReturnType MD5Hash(const std::shared_ptr<org::apache::nifi::minifi::io::BaseStream>& stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    MD5_CTX context;
    +    MD5_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        MD5_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +
    +    if (ret_val.second > 0) {
    +      unsigned char digest[MD5_DIGEST_LENGTH];
    +      MD5_Final(digest, &context);
    +      ret_val.first = digestToString(digest, MD5_DIGEST_LENGTH);
    +    }
    +    return ret_val;
    +  }
    +
    +  HashReturnType SHA1Hash(const std::shared_ptr<org::apache::nifi::minifi::io::BaseStream>& stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    SHA_CTX context;
    +    SHA1_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        SHA1_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +
    +    if (ret_val.second > 0) {
    +      unsigned char digest[SHA_DIGEST_LENGTH];
    +      SHA1_Final(digest, &context);
    +      ret_val.first = digestToString(digest, SHA_DIGEST_LENGTH);
    +    }
    +    return ret_val;
    +  }
    +
    +  HashReturnType SHA256Hash(const std::shared_ptr<org::apache::nifi::minifi::io::BaseStream>& stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    SHA256_CTX context;
    +    SHA256_Init(&context);
    +
    +    size_t ret ;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        SHA256_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +
    +    if (ret_val.second > 0) {
    +      unsigned char digest[SHA256_DIGEST_LENGTH];
    +      SHA256_Final(digest, &context);
    +      ret_val.first = digestToString(digest, SHA256_DIGEST_LENGTH);
    +    }
    +    return ret_val;
    +  }
    +}
    +
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +static const std::map<std::string, const std::function<HashReturnType(const std::shared_ptr<io::BaseStream>&)>> HashAlgos =
    +  { {"MD5",  MD5Hash}, {"SHA1", SHA1Hash}, {"SHA256", SHA256Hash} };
    +
    +//! HashContent Class
    +class HashContent : public core::Processor {
    + public:
    +  //! Constructor
    +  /*!
    +  * Create a new processor
    +  */
    +  explicit HashContent(std::string name,  utils::Identifier uuid = utils::Identifier())
    +  : Processor(name, uuid)
    +  {
    +    logger_ = logging::LoggerFactory<HashContent>::getLogger();
    +  }
    +  //! Processor Name
    +  static constexpr char const* ProcessorName = "HashContent";
    +  //! Supported Properties
    +  static core::Property HashAttribute;
    +  static core::Property HashAlgorithm;
    +  //! Supported Relationships
    +  static core::Relationship Success;
    +
    +  void onSchedule(core::ProcessContext *context, core::ProcessSessionFactory *sessionFactory) override;
    --- End diff --
    
    Okay, removed them.


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235707338
  
    --- Diff: libminifi/include/processors/ContentHash.h ---
    @@ -0,0 +1,186 @@
    +/**
    + * @file ContentHash.h
    + * ContentHash class declaration
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +#ifndef NIFI_MINIFI_CPP_CONTENTHASH_H
    +#define NIFI_MINIFI_CPP_CONTENTHASH_H
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <iomanip>
    +#include <map>
    +#include <memory>
    +#include <string>
    +#include <sstream>
    +#include <utility>
    +#include <stdint.h>
    +
    +#include <openssl/md5.h>
    +#include <openssl/sha.h>
    +
    +#include "FlowFileRecord.h"
    +#include "core/Processor.h"
    +#include "core/ProcessSession.h"
    +#include "core/Resource.h"
    +#include "io/BaseStream.h"
    +
    +using HashReturnType = std::pair<std::string, int64_t>;
    +
    +namespace {
    +#define HASH_BUFFER_SIZE 16384
    +
    +  std::string digestToString(const unsigned char * const digest, size_t size) {
    +    std::stringstream ss;
    +    for(int i = 0; i < size; i++)
    +    {
    +      ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i];
    +    }
    +    return ss.str();
    +  }
    +
    +  HashReturnType MD5Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    --- End diff --
    
    Nope, good catch, should be const ref.


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235461700
  
    --- Diff: libminifi/src/processors/ContentHash.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file ContentHash.cpp
    + * ContentHash class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/ContentHash.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property ContentHash::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property ContentHash::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship ContentHash::Success("success", "success operational on the flow record");
    +
    +void ContentHash::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void ContentHash::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
    +  std::shared_ptr<core::FlowFile> flowFile = session->get();
    +
    +  if (!flowFile) {
    +    return;
    +  }
    +
    +  ReadCallback cb(flowFile, context);
    +  session->read(flowFile, &cb);
    +  session->transfer(flowFile, Success);
    +}
    +
    +int64_t ContentHash::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
    +  std::string attrKey, algoName;
    +  ctx_->getProperty(HashAttribute.getName(), attrKey);
    --- End diff --
    
    These can be done in onSchedule since this isn't dynamic


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235476969
  
    --- Diff: libminifi/src/processors/ContentHash.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file ContentHash.cpp
    + * ContentHash class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/ContentHash.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property ContentHash::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property ContentHash::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship ContentHash::Success("success", "success operational on the flow record");
    +
    +void ContentHash::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void ContentHash::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
    +  std::shared_ptr<core::FlowFile> flowFile = session->get();
    +
    +  if (!flowFile) {
    +    return;
    +  }
    +
    +  ReadCallback cb(flowFile, context);
    +  session->read(flowFile, &cb);
    +  session->transfer(flowFile, Success);
    +}
    +
    +int64_t ContentHash::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
    +  std::string attrKey, algoName;
    +  ctx_->getProperty(HashAttribute.getName(), attrKey);
    +  ctx_->getProperty(HashAlgorithm.getName(), algoName);
    +  std::transform(algoName.begin(), algoName.end(), algoName.begin(), ::toupper);
    +
    +  // Erase '-' to make sha-256 and sha-2 work, too
    +  algoName.erase(std::remove(algoName.begin(), algoName.end(), '-'), algoName.end());
    +
    +  // This throws in case algo is not found, but that's fine
    +  auto algo = HashAlgos.at(algoName);
    +
    +  const auto& ret_val = algo(stream);
    +
    +  if (ret_val.second <= 0) {
    --- End diff --
    
    Is this necessary? Cryptographic hash functions ensure the result will never be empty. If the stream failed the digest string would be empty. This would make the return code unnecessary. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235471397
  
    --- Diff: libminifi/src/processors/ContentHash.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file ContentHash.cpp
    + * ContentHash class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/ContentHash.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property ContentHash::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property ContentHash::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship ContentHash::Success("success", "success operational on the flow record");
    +
    +void ContentHash::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void ContentHash::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
    +  std::shared_ptr<core::FlowFile> flowFile = session->get();
    +
    +  if (!flowFile) {
    +    return;
    +  }
    +
    +  ReadCallback cb(flowFile, context);
    +  session->read(flowFile, &cb);
    +  session->transfer(flowFile, Success);
    +}
    +
    +int64_t ContentHash::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
    +  std::string attrKey, algoName;
    +  ctx_->getProperty(HashAttribute.getName(), attrKey);
    +  ctx_->getProperty(HashAlgorithm.getName(), algoName);
    +  std::transform(algoName.begin(), algoName.end(), algoName.begin(), ::toupper);
    +
    +  // Erase '-' to make sha-256 and sha-2 work, too
    +  algoName.erase(std::remove(algoName.begin(), algoName.end(), '-'), algoName.end());
    +
    +  // This throws in case algo is not found, but that's fine
    --- End diff --
    
    Might the code above be less duplicative with a simple if statement here? 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235734574
  
    --- Diff: libminifi/src/processors/ContentHash.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file ContentHash.cpp
    + * ContentHash class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/ContentHash.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property ContentHash::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property ContentHash::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship ContentHash::Success("success", "success operational on the flow record");
    +
    +void ContentHash::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void ContentHash::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
    +  std::shared_ptr<core::FlowFile> flowFile = session->get();
    +
    +  if (!flowFile) {
    +    return;
    +  }
    +
    +  ReadCallback cb(flowFile, context);
    +  session->read(flowFile, &cb);
    +  session->transfer(flowFile, Success);
    +}
    +
    +int64_t ContentHash::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
    +  std::string attrKey, algoName;
    +  ctx_->getProperty(HashAttribute.getName(), attrKey);
    --- End diff --
    
    Moved


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235734644
  
    --- Diff: libminifi/src/processors/ContentHash.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file ContentHash.cpp
    + * ContentHash class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/ContentHash.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property ContentHash::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property ContentHash::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship ContentHash::Success("success", "success operational on the flow record");
    +
    +void ContentHash::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void ContentHash::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
    +  std::shared_ptr<core::FlowFile> flowFile = session->get();
    +
    +  if (!flowFile) {
    +    return;
    +  }
    +
    +  ReadCallback cb(flowFile, context);
    +  session->read(flowFile, &cb);
    +  session->transfer(flowFile, Success);
    +}
    +
    +int64_t ContentHash::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
    +  std::string attrKey, algoName;
    +  ctx_->getProperty(HashAttribute.getName(), attrKey);
    --- End diff --
    
    Moved


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235459513
  
    --- Diff: libminifi/include/processors/ContentHash.h ---
    @@ -0,0 +1,186 @@
    +/**
    + * @file ContentHash.h
    + * ContentHash class declaration
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +#ifndef NIFI_MINIFI_CPP_CONTENTHASH_H
    +#define NIFI_MINIFI_CPP_CONTENTHASH_H
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <iomanip>
    +#include <map>
    +#include <memory>
    +#include <string>
    +#include <sstream>
    +#include <utility>
    +#include <stdint.h>
    +
    +#include <openssl/md5.h>
    +#include <openssl/sha.h>
    +
    +#include "FlowFileRecord.h"
    +#include "core/Processor.h"
    +#include "core/ProcessSession.h"
    +#include "core/Resource.h"
    +#include "io/BaseStream.h"
    +
    +using HashReturnType = std::pair<std::string, int64_t>;
    +
    +namespace {
    +#define HASH_BUFFER_SIZE 16384
    +
    +  std::string digestToString(const unsigned char * const digest, size_t size) {
    +    std::stringstream ss;
    +    for(int i = 0; i < size; i++)
    +    {
    +      ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i];
    +    }
    +    return ss.str();
    +  }
    +
    +  HashReturnType MD5Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    --- End diff --
    
    Is the ref count increment intentional?


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r237058367
  
    --- Diff: libminifi/src/processors/ContentHash.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file ContentHash.cpp
    + * ContentHash class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/ContentHash.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property ContentHash::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property ContentHash::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship ContentHash::Success("success", "success operational on the flow record");
    +
    +void ContentHash::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void ContentHash::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
    +  std::shared_ptr<core::FlowFile> flowFile = session->get();
    +
    +  if (!flowFile) {
    +    return;
    +  }
    +
    +  ReadCallback cb(flowFile, context);
    +  session->read(flowFile, &cb);
    +  session->transfer(flowFile, Success);
    +}
    +
    +int64_t ContentHash::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
    +  std::string attrKey, algoName;
    +  ctx_->getProperty(HashAttribute.getName(), attrKey);
    +  ctx_->getProperty(HashAlgorithm.getName(), algoName);
    +  std::transform(algoName.begin(), algoName.end(), algoName.begin(), ::toupper);
    +
    +  // Erase '-' to make sha-256 and sha-2 work, too
    +  algoName.erase(std::remove(algoName.begin(), algoName.end(), '-'), algoName.end());
    +
    +  // This throws in case algo is not found, but that's fine
    --- End diff --
    
    Failure is a generic term and is generally up to the independent processor to define -- your definition is not true across all processors in NiFi, so I'm fine leaving that up to the author to decide in this case; however, if the empty content case accounted for within the processor? Some have perceived empty content hashing as a potential failure case. Can make that a follow on task, though, but we should probably reach parity with NiFI eventually


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235710768
  
    --- Diff: libminifi/src/processors/ContentHash.cpp ---
    @@ -0,0 +1,100 @@
    +/**
    + * @file ContentHash.cpp
    + * ContentHash class implementation
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <algorithm>
    +#include <iostream>
    +#include <memory>
    +#include <string>
    +#include "processors/ContentHash.h"
    +#include "core/ProcessContext.h"
    +#include "core/ProcessSession.h"
    +#include "core/FlowFile.h"
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +core::Property ContentHash::HashAttribute("Hash Attribute", "Attribute to store checksum to", "Checksum");
    +core::Property ContentHash::HashAlgorithm("Hash Algorithm", "Name of the algorithm used to generate checksum", "MD5");
    +core::Relationship ContentHash::Success("success", "success operational on the flow record");
    +
    +void ContentHash::initialize() {
    +  //! Set the supported properties
    +  std::set<core::Property> properties;
    +  properties.insert(HashAttribute);
    +  properties.insert(HashAlgorithm);
    +  setSupportedProperties(properties);
    +  //! Set the supported relationships
    +  std::set<core::Relationship> relationships;
    +  relationships.insert(Success);
    +  setSupportedRelationships(relationships);
    +}
    +
    +void ContentHash::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
    +  std::shared_ptr<core::FlowFile> flowFile = session->get();
    +
    +  if (!flowFile) {
    +    return;
    +  }
    +
    +  ReadCallback cb(flowFile, context);
    +  session->read(flowFile, &cb);
    +  session->transfer(flowFile, Success);
    +}
    +
    +int64_t ContentHash::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
    +  std::string attrKey, algoName;
    +  ctx_->getProperty(HashAttribute.getName(), attrKey);
    +  ctx_->getProperty(HashAlgorithm.getName(), algoName);
    +  std::transform(algoName.begin(), algoName.end(), algoName.begin(), ::toupper);
    +
    +  // Erase '-' to make sha-256 and sha-2 work, too
    +  algoName.erase(std::remove(algoName.begin(), algoName.end(), '-'), algoName.end());
    +
    +  // This throws in case algo is not found, but that's fine
    +  auto algo = HashAlgos.at(algoName);
    +
    +  const auto& ret_val = algo(stream);
    +
    +  if (ret_val.second <= 0) {
    --- End diff --
    
    Rerturn code was originally introduced to meet the requirements of readCallback: it's int64_t return value expexts the number of bytes read to be returned. 
    The condition here can be removed, we can stamp the empty hash as well. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235474428
  
    --- Diff: libminifi/include/processors/ContentHash.h ---
    @@ -0,0 +1,186 @@
    +/**
    + * @file ContentHash.h
    + * ContentHash class declaration
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +#ifndef NIFI_MINIFI_CPP_CONTENTHASH_H
    +#define NIFI_MINIFI_CPP_CONTENTHASH_H
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <iomanip>
    +#include <map>
    +#include <memory>
    +#include <string>
    +#include <sstream>
    +#include <utility>
    +#include <stdint.h>
    +
    +#include <openssl/md5.h>
    +#include <openssl/sha.h>
    +
    +#include "FlowFileRecord.h"
    +#include "core/Processor.h"
    +#include "core/ProcessSession.h"
    +#include "core/Resource.h"
    +#include "io/BaseStream.h"
    +
    +using HashReturnType = std::pair<std::string, int64_t>;
    +
    +namespace {
    +#define HASH_BUFFER_SIZE 16384
    +
    +  std::string digestToString(const unsigned char * const digest, size_t size) {
    +    std::stringstream ss;
    +    for(int i = 0; i < size; i++)
    +    {
    +      ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i];
    +    }
    +    return ss.str();
    +  }
    +
    +  HashReturnType MD5Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    MD5_CTX context;
    +    MD5_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        MD5_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +    unsigned char digest[MD5_DIGEST_LENGTH];
    +    MD5_Final(digest, &context);
    +
    +    ret_val.first = digestToString(digest, MD5_DIGEST_LENGTH);
    +    return ret_val;
    +  }
    +
    +  HashReturnType SHA1Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    SHA_CTX context;
    +    SHA1_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        SHA1_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +    unsigned char digest[SHA_DIGEST_LENGTH];
    +    SHA1_Final(digest, &context);
    +
    +    ret_val.first = digestToString(digest, SHA_DIGEST_LENGTH);
    +    return ret_val;
    +  }
    +
    +  HashReturnType SHA256Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    SHA256_CTX context;
    +    SHA256_Init(&context);
    +
    +    size_t ret ;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        SHA256_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +    unsigned char digest[SHA256_DIGEST_LENGTH];
    +    SHA256_Final(digest, &context);
    +
    +    ret_val.first = digestToString(digest, SHA256_DIGEST_LENGTH);
    +    return ret_val;
    +  }
    +}
    +
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +static const std::map<std::string, const std::function<HashReturnType(std::shared_ptr<io::BaseStream>)>> HashAlgos =
    --- End diff --
    
    SHA-2 is suite of digests ( a set of function). It's not tantamount to SHA-256 as it includes others. We should probably avoid using that as it's not a ubiquitous understanding. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235707469
  
    --- Diff: PROCESSORS.md ---
    @@ -130,6 +131,31 @@ default values, and whether a property supports the NiFi Expression Language.
     | success | All FlowFiles are routed to this relationship. |
     
     
    +## ContentHash
    +
    +### Description
    +
    +ContentHash calculates the checksum of the content of the flowfile and adds it as an attribute.
    --- End diff --
    
    Ok, renaming. 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by asfgit <gi...@git.apache.org>.
Github user asfgit closed the pull request at:

    https://github.com/apache/nifi-minifi-cpp/pull/445


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235461991
  
    --- Diff: libminifi/include/processors/ContentHash.h ---
    @@ -0,0 +1,186 @@
    +/**
    + * @file ContentHash.h
    + * ContentHash class declaration
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +#ifndef NIFI_MINIFI_CPP_CONTENTHASH_H
    +#define NIFI_MINIFI_CPP_CONTENTHASH_H
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <iomanip>
    +#include <map>
    +#include <memory>
    +#include <string>
    +#include <sstream>
    +#include <utility>
    +#include <stdint.h>
    +
    +#include <openssl/md5.h>
    +#include <openssl/sha.h>
    +
    +#include "FlowFileRecord.h"
    +#include "core/Processor.h"
    +#include "core/ProcessSession.h"
    +#include "core/Resource.h"
    +#include "io/BaseStream.h"
    +
    +using HashReturnType = std::pair<std::string, int64_t>;
    +
    +namespace {
    +#define HASH_BUFFER_SIZE 16384
    +
    +  std::string digestToString(const unsigned char * const digest, size_t size) {
    +    std::stringstream ss;
    +    for(int i = 0; i < size; i++)
    +    {
    +      ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i];
    +    }
    +    return ss.str();
    +  }
    +
    +  HashReturnType MD5Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    MD5_CTX context;
    +    MD5_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        MD5_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +    unsigned char digest[MD5_DIGEST_LENGTH];
    +    MD5_Final(digest, &context);
    +
    +    ret_val.first = digestToString(digest, MD5_DIGEST_LENGTH);
    +    return ret_val;
    +  }
    +
    +  HashReturnType SHA1Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    SHA_CTX context;
    +    SHA1_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        SHA1_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +    unsigned char digest[SHA_DIGEST_LENGTH];
    +    SHA1_Final(digest, &context);
    +
    +    ret_val.first = digestToString(digest, SHA_DIGEST_LENGTH);
    +    return ret_val;
    +  }
    +
    --- End diff --
    
    Can these functions be combined to reduce duplication? 


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by phrocker <gi...@git.apache.org>.
Github user phrocker commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235458978
  
    --- Diff: PROCESSORS.md ---
    @@ -130,6 +131,31 @@ default values, and whether a property supports the NiFi Expression Language.
     | success | All FlowFiles are routed to this relationship. |
     
     
    +## ContentHash
    +
    +### Description
    +
    +ContentHash calculates the checksum of the content of the flowfile and adds it as an attribute.
    --- End diff --
    
    In the NiFi Ecosystem processors are VerbNoun, so this would be more applicably, "HashContent"


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235706224
  
    --- Diff: libminifi/include/processors/ContentHash.h ---
    @@ -0,0 +1,186 @@
    +/**
    + * @file ContentHash.h
    + * ContentHash class declaration
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +#ifndef NIFI_MINIFI_CPP_CONTENTHASH_H
    +#define NIFI_MINIFI_CPP_CONTENTHASH_H
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <iomanip>
    +#include <map>
    +#include <memory>
    +#include <string>
    +#include <sstream>
    +#include <utility>
    +#include <stdint.h>
    +
    +#include <openssl/md5.h>
    +#include <openssl/sha.h>
    +
    +#include "FlowFileRecord.h"
    +#include "core/Processor.h"
    +#include "core/ProcessSession.h"
    +#include "core/Resource.h"
    +#include "io/BaseStream.h"
    +
    +using HashReturnType = std::pair<std::string, int64_t>;
    +
    +namespace {
    +#define HASH_BUFFER_SIZE 16384
    +
    +  std::string digestToString(const unsigned char * const digest, size_t size) {
    +    std::stringstream ss;
    +    for(int i = 0; i < size; i++)
    +    {
    +      ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i];
    +    }
    +    return ss.str();
    +  }
    +
    +  HashReturnType MD5Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    MD5_CTX context;
    +    MD5_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        MD5_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +    unsigned char digest[MD5_DIGEST_LENGTH];
    +    MD5_Final(digest, &context);
    +
    +    ret_val.first = digestToString(digest, MD5_DIGEST_LENGTH);
    +    return ret_val;
    +  }
    +
    +  HashReturnType SHA1Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    SHA_CTX context;
    +    SHA1_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        SHA1_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +    unsigned char digest[SHA_DIGEST_LENGTH];
    +    SHA1_Final(digest, &context);
    +
    +    ret_val.first = digestToString(digest, SHA_DIGEST_LENGTH);
    +    return ret_val;
    +  }
    +
    +  HashReturnType SHA256Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    SHA256_CTX context;
    +    SHA256_Init(&context);
    +
    +    size_t ret ;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        SHA256_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    +      }
    +    } while(ret > 0);
    +    unsigned char digest[SHA256_DIGEST_LENGTH];
    +    SHA256_Final(digest, &context);
    +
    +    ret_val.first = digestToString(digest, SHA256_DIGEST_LENGTH);
    +    return ret_val;
    +  }
    +}
    +
    +
    +namespace org {
    +namespace apache {
    +namespace nifi {
    +namespace minifi {
    +namespace processors {
    +
    +static const std::map<std::string, const std::function<HashReturnType(std::shared_ptr<io::BaseStream>)>> HashAlgos =
    --- End diff --
    
    SHA-256 is usually reflected as SHA2, but I admit that it's not so proper. Removed.


---

[GitHub] nifi-minifi-cpp pull request #445: MINIFICPP-681 - Add content hash processo...

Posted by arpadboda <gi...@git.apache.org>.
Github user arpadboda commented on a diff in the pull request:

    https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235710954
  
    --- Diff: libminifi/include/processors/ContentHash.h ---
    @@ -0,0 +1,186 @@
    +/**
    + * @file ContentHash.h
    + * ContentHash class declaration
    + *
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +#ifndef NIFI_MINIFI_CPP_CONTENTHASH_H
    +#define NIFI_MINIFI_CPP_CONTENTHASH_H
    +
    +#ifdef OPENSSL_SUPPORT
    +
    +#include <iomanip>
    +#include <map>
    +#include <memory>
    +#include <string>
    +#include <sstream>
    +#include <utility>
    +#include <stdint.h>
    +
    +#include <openssl/md5.h>
    +#include <openssl/sha.h>
    +
    +#include "FlowFileRecord.h"
    +#include "core/Processor.h"
    +#include "core/ProcessSession.h"
    +#include "core/Resource.h"
    +#include "io/BaseStream.h"
    +
    +using HashReturnType = std::pair<std::string, int64_t>;
    +
    +namespace {
    +#define HASH_BUFFER_SIZE 16384
    +
    +  std::string digestToString(const unsigned char * const digest, size_t size) {
    +    std::stringstream ss;
    +    for(int i = 0; i < size; i++)
    +    {
    +      ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i];
    +    }
    +    return ss.str();
    +  }
    +
    +  HashReturnType MD5Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) {
    +    HashReturnType ret_val;
    +    ret_val.second = 0;
    +    uint8_t buffer[HASH_BUFFER_SIZE];
    +    MD5_CTX context;
    +    MD5_Init(&context);
    +
    +    size_t ret = 0;
    +    do {
    +      ret = stream->readData(buffer, HASH_BUFFER_SIZE);
    +      if(ret > 0) {
    +        MD5_Update(&context, buffer, ret);
    +        ret_val.second += ret;
    --- End diff --
    
    As per the previous comment, ret is to used as return value of the readCallback. 


---