You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ct...@apache.org on 2022/02/21 05:36:23 UTC

[accumulo] branch main updated: Automatically check for unapproved chars (#2514)

This is an automated email from the ASF dual-hosted git repository.

ctubbsii pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 0ede27e  Automatically check for unapproved chars (#2514)
0ede27e is described below

commit 0ede27e69406e26a7a5e8dc4e3bf116ee79712d1
Author: Christopher Tubbs <ct...@apache.org>
AuthorDate: Mon Feb 21 00:35:24 2022 -0500

    Automatically check for unapproved chars (#2514)
    
    * Set up GitHub Actions to check for unapproved non-ASCII characters
    * Replace unnecessary multi-byte quote characters with ASCII quotes
---
 .github/workflows/maven.yaml                       |  2 +
 assemble/src/main/resources/LICENSE                |  2 +-
 contrib/ci/find-unapproved-chars.sh                | 52 ++++++++++++++++++++++
 .../core/iterators/user/TransformingIterator.java  |  4 +-
 4 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/maven.yaml b/.github/workflows/maven.yaml
index 701665c..bbb8f89 100644
--- a/.github/workflows/maven.yaml
+++ b/.github/workflows/maven.yaml
@@ -49,6 +49,8 @@ jobs:
         restore-keys: ${{ runner.os }}-m2
     - name: Show the first log message
       run: git log -n1
+    - name: Check for unapproved characters
+      run: contrib/ci/find-unapproved-chars.sh
     - name: Build with Maven (Fast Build)
       timeout-minutes: 20
       run: mvn -B -V -e -ntp "-Dstyle.color=always" clean package dependency:resolve -DskipTests -DskipFormat -DverifyFormat
diff --git a/assemble/src/main/resources/LICENSE b/assemble/src/main/resources/LICENSE
index eaf71a4..16ef198 100644
--- a/assemble/src/main/resources/LICENSE
+++ b/assemble/src/main/resources/LICENSE
@@ -561,7 +561,7 @@ to the terms and conditions of the following licenses.
           be used to endorse or promote products derived from this software without
           specific prior written permission.
 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
diff --git a/contrib/ci/find-unapproved-chars.sh b/contrib/ci/find-unapproved-chars.sh
new file mode 100755
index 0000000..fa48b30
--- /dev/null
+++ b/contrib/ci/find-unapproved-chars.sh
@@ -0,0 +1,52 @@
+#! /usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# The purpose of this ci script is to ensure that a pull request doesn't
+# unintentionally, or maliciously, add any new non-ASCII characters unless they
+# are preapproved on the ALLOWED list or in known binary or resource files
+NUM_EXPECTED=0
+ALLOWED='©èö🐈三四五六八九十'
+
+function findallnonascii() {
+  # -P for perl matching, -o for only showing the match for counting occurrences not lines
+  local opts='-Po'
+  if [[ $1 == 'print' ]]; then
+    # -P for perl matching, -H for always showing filenames, -n for showing line numbers
+    opts='-PHn'
+  fi
+  find . -type f \
+    -not -path '*/\.git/*' \
+    -not -path '*/monitor/resources/external/*' \
+    -not -path '*/tserver/src/test/resources/walog-from-14/*' \
+    -not -regex '.*[.]\(png\|jar\|rf\|jceks\|walog\)$' \
+    -exec grep "$opts" "[^[:ascii:]$ALLOWED]" {} +
+}
+
+function comparecounts() {
+  local count; count=$(findallnonascii | wc -l)
+  if [[ $NUM_EXPECTED -ne $count ]]; then
+    echo "Expected $NUM_EXPECTED, but found $count unapproved non-ASCII characters:"
+    findallnonascii 'print'
+    return 1
+  fi
+}
+
+comparecounts && echo "Found exactly $NUM_EXPECTED unapproved non-ASCII characters, as expected"
+
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/user/TransformingIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/user/TransformingIterator.java
index a6e41f1..bc873f8 100644
--- a/core/src/main/java/org/apache/accumulo/core/iterators/user/TransformingIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/user/TransformingIterator.java
@@ -228,7 +228,7 @@ public abstract class TransformingIterator extends WrappingIterator implements O
       keyPos++;
 
     // If we emptied out the transformed key map then transform the next key
-    // set from the source. It’s possible that transformation could produce keys
+    // set from the source. It's possible that transformation could produce keys
     // that are outside of our range or are not visible to the end user, so after the
     // call below we might not have added any keys to the map. Keep going until
     // we either get some keys in the map or exhaust the source iterator.
@@ -252,7 +252,7 @@ public abstract class TransformingIterator extends WrappingIterator implements O
     // Range clipping could cause us to trim out all the keys we transformed.
     // Keep looping until we either have some keys in the output range, or have
     // exhausted the source iterator.
-    keyPos = -1; // “Clear” list so hasTop returns false to get us into the loop (transformKeys
+    keyPos = -1; // "Clear" list so hasTop returns false to get us into the loop (transformKeys
                  // actually clears)
     while (!hasTop() && super.hasTop()) {
       // Build up a sorted list of all keys for the same prefix. When