You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@isis.apache.org by ah...@apache.org on 2022/12/07 08:11:13 UTC

[isis] branch master updated: ISIS-3302: [Commons] Text Util to properly handle Files with BOM Signature

This is an automated email from the ASF dual-hosted git repository.

ahuber pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/isis.git


The following commit(s) were added to refs/heads/master by this push:
     new 1dd8fb00f6 ISIS-3302: [Commons] Text Util to properly handle Files with BOM Signature
1dd8fb00f6 is described below

commit 1dd8fb00f6182ae387f0b2f34c0df61d1eb04323
Author: Andi Huber <ah...@apache.org>
AuthorDate: Wed Dec 7 09:11:05 2022 +0100

    ISIS-3302: [Commons] Text Util to properly handle Files with BOM
    Signature
---
 .../apache/causeway/commons/internal/base/_Text.java | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/commons/src/main/java/org/apache/causeway/commons/internal/base/_Text.java b/commons/src/main/java/org/apache/causeway/commons/internal/base/_Text.java
index a6556a578b..a86270a70d 100644
--- a/commons/src/main/java/org/apache/causeway/commons/internal/base/_Text.java
+++ b/commons/src/main/java/org/apache/causeway/commons/internal/base/_Text.java
@@ -109,8 +109,12 @@ public final class _Text {
         try(Scanner scanner = new Scanner(input, charset.name())){
             scanner.useDelimiter("\\n");
             while(scanner.hasNext()) {
-                val line = scanner.next();
-                lines.add(line.replace("\r", ""));
+                var line = scanner.next()
+                        .replace("\r", "");
+                if(lines.size()==0) {
+                    line = stripBom(line); // special handling of first line
+                }
+                lines.add(line);
             }
         }
         return Can.ofCollection(lines);
@@ -418,4 +422,16 @@ public final class _Text {
         return constraintLines.stream();
     }
 
+    /**
+     * If line has a BOM 65279 (0xFEFF) leading character, strip it.
+     * <p>
+     * Some UTF-8 formatted files may have a BOM signature at their start.
+     */
+    private static String stripBom(final String line) {
+        if(line.length()>0
+                && line.charAt(0)==65279) {
+            return line.substring(1);
+        }
+        return line;
+    }
 }