You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@linkis.apache.org by pe...@apache.org on 2022/01/27 05:03:50 UTC

[incubator-linkis] branch dev-1.1.0-datasource updated: upgrade poi and excel-streaming-reader (#1309)

This is an automated email from the ASF dual-hosted git repository.

peacewong pushed a commit to branch dev-1.1.0-datasource
in repository https://gitbox.apache.org/repos/asf/incubator-linkis.git


The following commit(s) were added to refs/heads/dev-1.1.0-datasource by this push:
     new 5aea85b  upgrade poi and excel-streaming-reader (#1309)
5aea85b is described below

commit 5aea85b7c0ac2d4523b375340c5aa1e16826aede
Author: PJ Fanning <pj...@users.noreply.github.com>
AuthorDate: Thu Jan 27 06:03:44 2022 +0100

    upgrade poi and excel-streaming-reader (#1309)
    
    * upgrade poi and excel-streaming-reader
    
    * tune xlsx streaming reader
    
    * Update XlsxUtils.java
    
    * curvesapi version
---
 LICENSE-binary                                     |  6 +-
 licenses-binary/LICENSE-poi-ooxml-schemas.txt      |  2 +-
 licenses-binary/LICENSE-poi-ooxml.txt              |  2 +-
 licenses-binary/LICENSE-poi.txt                    |  2 +-
 linkis-commons/linkis-storage/pom.xml              | 12 +---
 .../org/apache/linkis/storage/excel/XlsxUtils.java | 76 +++++++++++-----------
 .../engineconn-plugins/flink/pom.xml               |  4 +-
 .../engineconn-plugins/spark/pom.xml               |  4 +-
 tool/dependencies/known-dependencies.txt           |  9 ++-
 tool/dependencies/third-party-dependencies.txt     |  9 ++-
 10 files changed, 60 insertions(+), 66 deletions(-)

diff --git a/LICENSE-binary b/LICENSE-binary
index d568f8f..f25561a 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -284,7 +284,7 @@ See licenses-binary/ for text of these licenses.
     (Apache License, Version 2.0) Apache Log4j JUL Adapter (org.apache.logging.log4j:log4j-jul:2.17.0 - https://logging.apache.org/log4j/2.x/log4j-jul/)
     (Apache License, Version 2.0) Apache Log4j SLF4J Binding (org.apache.logging.log4j:log4j-slf4j-impl:2.17.0 - https://logging.apache.org/log4j/2.x/log4j-slf4j-impl/)
     (Apache License, Version 2.0) Apache Log4j Web (org.apache.logging.log4j:log4j-web:2.6.2 - http://logging.apache.org/log4j/2.x/log4j-web/)
-    (Apache License, Version 2.0) Apache POI (org.apache.poi:poi-ooxml-schemas:3.17 - http://poi.apache.org/)
+    (Apache License, Version 2.0) Apache POI (org.apache.poi:poi-ooxml-lite:5.2.0 - http://poi.apache.org/)
     (Apache License, Version 2.0) Apache Velocity (org.apache.velocity:velocity:1.5 - http://velocity.apache.org/engine/releases/velocity-1.5/)
     (Apache License, Version 2.0) ApacheDS I18n (org.apache.directory.server:apacheds-i18n:2.0.0-M15 - http://directory.apache.org/apacheds/1.5/apacheds-i18n)
     (Apache License, Version 2.0) ApacheDS Protocol Kerberos Codec (org.apache.directory.server:apacheds-kerberos-codec:2.0.0-M15 - http://directory.apache.org/apacheds/1.5/apacheds-kerberos-codec)
@@ -407,7 +407,7 @@ See licenses-binary/ for text of these licenses.
     (Apache License, Version 2.0) Spring Web MVC (org.springframework:spring-webmvc:5.2.2.RELEASE - https://github.com/spring-projects/spring-framework)
     (Apache License, Version 2.0) Spring WebFlux (org.springframework:spring-webflux:5.2.2.RELEASE - https://github.com/spring-projects/spring-framework)
     (Apache License, Version 2.0) StAX API (stax:stax-api:1.0.1 - http://stax.codehaus.org/)
-    (Apache License, Version 2.0) Streaming Excel reader (com.monitorjbl:xlsx-streamer:1.2.1 - https://github.com/monitorjbl/excel-streaming-reader)
+    (Apache License, Version 2.0) Streaming Excel reader (com.github.pjfanning:excel-streaming-reader:3.4.0 - https://github.com/pjfanning/excel-streaming-reader)
     (Apache License, Version 2.0) Woodstox (com.fasterxml.woodstox:woodstox-core:5.2.1 - https://github.com/FasterXML/woodstox)
     (Apache License, Version 2.0) XML Commons Resolver Component (xml-resolver:xml-resolver:1.2 - http://xml.apache.org/commons/components/resolver/)
     (Apache License, Version 2.0) Xerces2 Java (com.rackspace.apache:xerces2-xsd11:2.11.1 - http://xerces.apache.org/xerces2-j/)
@@ -526,7 +526,7 @@ See licenses-binary/ for text of these licenses.
     (The 2-Clause BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
     (The 2-Clause BSD License) JLine (jline:jline:2.12 - http://nexus.sonatype.org/oss-repository-hosting.html/jline)
     (The 2-Clause BSD License) Stax2 API (org.codehaus.woodstox:stax2-api:4.2 - http://github.com/FasterXML/stax2-api)
-    (The 2-Clause BSD License) curvesapi (com.github.virtuald:curvesapi:1.04 - https://github.com/virtuald/curvesapi)
+    (The 2-Clause BSD License) curvesapi (com.github.virtuald:curvesapi:1.06 - https://github.com/virtuald/curvesapi)
     (The 2-Clause BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
     (The 3-Clause BSD License) (WTFPL) Reflections (org.reflections:reflections:0.9.10 - http://github.com/ronmamo/reflections)
     (The 3-Clause BSD License) Commons Compiler (org.codehaus.janino:commons-compiler:2.7.6 - http://docs.codehaus.org/display/JANINO/Home/commons-compiler)
diff --git a/licenses-binary/LICENSE-poi-ooxml-schemas.txt b/licenses-binary/LICENSE-poi-ooxml-schemas.txt
index 3b63d08..6510e8e 100644
--- a/licenses-binary/LICENSE-poi-ooxml-schemas.txt
+++ b/licenses-binary/LICENSE-poi-ooxml-schemas.txt
@@ -209,7 +209,7 @@ license terms. Your use of these subcomponents is subject to the terms
 and conditions of the following licenses:
 
 
-Office Open XML schemas (ooxml-schemas-1.*.jar)
+Office Open XML schemas (poi-ooxml-lite*.jar)
 
     The Office Open XML schema definitions used by Apache POI are
     a part of the Office Open XML ECMA Specification (ECMA-376, [1]).
diff --git a/licenses-binary/LICENSE-poi-ooxml.txt b/licenses-binary/LICENSE-poi-ooxml.txt
index 3b63d08..6510e8e 100644
--- a/licenses-binary/LICENSE-poi-ooxml.txt
+++ b/licenses-binary/LICENSE-poi-ooxml.txt
@@ -209,7 +209,7 @@ license terms. Your use of these subcomponents is subject to the terms
 and conditions of the following licenses:
 
 
-Office Open XML schemas (ooxml-schemas-1.*.jar)
+Office Open XML schemas (poi-ooxml-lite*.jar)
 
     The Office Open XML schema definitions used by Apache POI are
     a part of the Office Open XML ECMA Specification (ECMA-376, [1]).
diff --git a/licenses-binary/LICENSE-poi.txt b/licenses-binary/LICENSE-poi.txt
index 3b63d08..6510e8e 100644
--- a/licenses-binary/LICENSE-poi.txt
+++ b/licenses-binary/LICENSE-poi.txt
@@ -209,7 +209,7 @@ license terms. Your use of these subcomponents is subject to the terms
 and conditions of the following licenses:
 
 
-Office Open XML schemas (ooxml-schemas-1.*.jar)
+Office Open XML schemas (poi-ooxml-lite*.jar)
 
     The Office Open XML schema definitions used by Apache POI are
     a part of the Office Open XML ECMA Specification (ECMA-376, [1]).
diff --git a/linkis-commons/linkis-storage/pom.xml b/linkis-commons/linkis-storage/pom.xml
index 092bb78..d0b0c0e 100644
--- a/linkis-commons/linkis-storage/pom.xml
+++ b/linkis-commons/linkis-storage/pom.xml
@@ -68,15 +68,9 @@
 <!--            <version>2.0</version>-->
 <!--        </dependency>-->
         <dependency>
-            <groupId>com.monitorjbl</groupId>
-            <artifactId>xlsx-streamer</artifactId>
-            <version>1.2.1</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>com.fasterxml.jackson.core</groupId>
-                    <artifactId>jackson-databind</artifactId>
-                </exclusion>
-            </exclusions>
+            <groupId>com.github.pjfanning</groupId>
+            <artifactId>excel-streaming-reader</artifactId>
+            <version>3.4.0</version>
         </dependency>
 
     </dependencies>
diff --git a/linkis-commons/linkis-storage/src/main/java/org/apache/linkis/storage/excel/XlsxUtils.java b/linkis-commons/linkis-storage/src/main/java/org/apache/linkis/storage/excel/XlsxUtils.java
index 499ee21..549c0c6 100644
--- a/linkis-commons/linkis-storage/src/main/java/org/apache/linkis/storage/excel/XlsxUtils.java
+++ b/linkis-commons/linkis-storage/src/main/java/org/apache/linkis/storage/excel/XlsxUtils.java
@@ -17,7 +17,7 @@
  
 package org.apache.linkis.storage.excel;
 
-import com.monitorjbl.xlsx.StreamingReader;
+import com.github.pjfanning.xlsx.StreamingReader;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.ss.usermodel.Sheet;
@@ -33,45 +33,47 @@ import java.util.List;
 public class XlsxUtils {
 
     public static List<List<String>> getBasicInfo(InputStream inputStream, File file) throws Exception{
-        List<List<String>> res = new ArrayList<>();
-        Workbook wb = null;
-        if(inputStream != null){
-            wb = StreamingReader.builder()
-                    .sstCacheSize(100)
-                    .rowCacheSize(2)    // number of rows to keep in memory (defaults to 10)
-                    .open(inputStream);
-        } else {
-            wb = StreamingReader.builder()
-                    .sstCacheSize(100)
-                    .rowCacheSize(2)    // number of rows to keep in memory (defaults to 10)
-                    .open(file);
-        }
-        List<String> sheetNames = new ArrayList<>();
-        for(Sheet sheet : wb){
-            sheetNames.add(sheet.getSheetName());
-        }
+        try {
+            List<List<String>> res = new ArrayList<>();
+            Workbook wb = null;
+            if(inputStream != null){
+                wb = StreamingReader.builder()
+                        .rowCacheSize(2)    // number of rows to keep in memory (defaults to 10)
+                        .setUseSstReadOnly(true)
+                        .open(inputStream);
+            } else {
+                wb = StreamingReader.builder()
+                        .rowCacheSize(2)    // number of rows to keep in memory (defaults to 10)
+                        .setUseSstReadOnly(true)
+                        .open(file);
+            }
+            List<String> sheetNames = new ArrayList<>();
+            for(Sheet sheet : wb){
+                sheetNames.add(sheet.getSheetName());
+            }
 
-        Sheet sheet = wb.getSheetAt(0);
-        Iterator<Row> iterator = sheet.iterator();
-        Row row = null;
-        while(iterator.hasNext() && row == null){
-            row = iterator.next();
-        }
+            Sheet sheet = wb.getSheetAt(0);
+            Iterator<Row> iterator = sheet.iterator();
+            Row row = null;
+            while(iterator.hasNext() && row == null){
+                row = iterator.next();
+            }
 
-        if(row == null){
-            throw new Exception("The incoming Excel file is empty(传入的Excel文件为空)");
-        }
+            if(row == null){
+                throw new Exception("The incoming Excel file is empty(传入的Excel文件为空)");
+            }
 
-        List<String> values = new ArrayList<>();
-        for(Cell cell:row){
-            values.add(cell.getStringCellValue());
-        }
-        if(inputStream != null){
-            inputStream.close();
+            List<String> values = new ArrayList<>();
+            for(Cell cell:row){
+                values.add(cell.getStringCellValue());
+            }
+            res.add(sheetNames);
+            res.add(values);
+            return res;
+        } finally {
+            if(inputStream != null){
+                inputStream.close();
+            }
         }
-        res.add(sheetNames);
-        res.add(values);
-        return res;
-
     }
 }
diff --git a/linkis-engineconn-plugins/engineconn-plugins/flink/pom.xml b/linkis-engineconn-plugins/engineconn-plugins/flink/pom.xml
index 5b206de..1f7defc 100644
--- a/linkis-engineconn-plugins/engineconn-plugins/flink/pom.xml
+++ b/linkis-engineconn-plugins/engineconn-plugins/flink/pom.xml
@@ -360,8 +360,8 @@
                     <groupId>io.netty</groupId>
                 </exclusion>
                 <exclusion>
-                    <artifactId>xlsx-streamer</artifactId>
-                    <groupId>com.monitorjbl</groupId>
+                    <groupId>com.github.pjfanning</groupId>
+                    <artifactId>excel-streaming-reader</artifactId>
                 </exclusion>
                 <exclusion>
                     <artifactId>netty-all</artifactId>
diff --git a/linkis-engineconn-plugins/engineconn-plugins/spark/pom.xml b/linkis-engineconn-plugins/engineconn-plugins/spark/pom.xml
index 49e59bf..e457a4e 100644
--- a/linkis-engineconn-plugins/engineconn-plugins/spark/pom.xml
+++ b/linkis-engineconn-plugins/engineconn-plugins/spark/pom.xml
@@ -89,8 +89,8 @@
                     <groupId>io.netty</groupId>
                 </exclusion>
                 <exclusion>
-                    <artifactId>xlsx-streamer</artifactId>
-                    <groupId>com.monitorjbl</groupId>
+                    <groupId>com.github.pjfanning</groupId>
+                    <artifactId>excel-streaming-reader</artifactId>
                 </exclusion>
                 <exclusion>
                     <artifactId>netty-all</artifactId>
diff --git a/tool/dependencies/known-dependencies.txt b/tool/dependencies/known-dependencies.txt
index 46bcebb..3d0638c 100644
--- a/tool/dependencies/known-dependencies.txt
+++ b/tool/dependencies/known-dependencies.txt
@@ -74,7 +74,7 @@ commons-text-1.6.jar
 curator-client-2.7.1.jar
 curator-framework-2.7.1.jar
 curator-recipes-2.7.1.jar
-curvesapi-1.04.jar
+curvesapi-1.06.jar
 datanucleus-core-4.1.17.jar
 dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
 druid-0.2.9.jar
@@ -285,7 +285,6 @@ netty-transport-native-epoll-4.1.49.Final-linux-x86_64.jar
 netty-transport-native-unix-common-4.1.49.Final.jar
 nio-multipart-parser-1.1.0.jar
 nio-stream-storage-1.1.3.jar
-ooxml-schemas-1.3.jar
 orc-core-1.3.3.jar
 org.eclipse.wst.xml.xpath2.processor-2.1.100.jar
 oro-2.0.8.jar
@@ -293,9 +292,9 @@ osgi-resource-locator-1.0.1.jar
 pagehelper-5.1.4.jar
 paranamer-2.7.jar
 paranamer-2.8.jar
-poi-3.17.jar
-poi-ooxml-3.17.jar
-poi-ooxml-schemas-3.17.jar
+poi-5.2.0.jar
+poi-ooxml-5.2.0.jar
+poi-ooxml-lite-5.2.0.jar
 protobuf-java-2.5.0.jar
 protostuff-api-1.6.2.jar
 protostuff-collectionschema-1.6.2.jar
diff --git a/tool/dependencies/third-party-dependencies.txt b/tool/dependencies/third-party-dependencies.txt
index aa0bf29..bcf5673 100644
--- a/tool/dependencies/third-party-dependencies.txt
+++ b/tool/dependencies/third-party-dependencies.txt
@@ -74,7 +74,7 @@ commons-text-1.6.jar
 curator-client-2.7.1.jar
 curator-framework-2.7.1.jar
 curator-recipes-2.7.1.jar
-curvesapi-1.04.jar
+curvesapi-1.06.jar
 datanucleus-core-4.1.17.jar
 dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
 druid-0.2.9.jar
@@ -285,7 +285,6 @@ netty-transport-native-epoll-4.1.49.Final-linux-x86_64.jar
 netty-transport-native-unix-common-4.1.49.Final.jar
 nio-multipart-parser-1.1.0.jar
 nio-stream-storage-1.1.3.jar
-ooxml-schemas-1.3.jar
 orc-core-1.3.3.jar
 org.eclipse.wst.xml.xpath2.processor-2.1.100.jar
 oro-2.0.8.jar
@@ -293,9 +292,9 @@ osgi-resource-locator-1.0.1.jar
 pagehelper-5.1.4.jar
 paranamer-2.7.jar
 paranamer-2.8.jar
-poi-3.17.jar
-poi-ooxml-3.17.jar
-poi-ooxml-schemas-3.17.jar
+poi-5.2.0.jar
+poi-ooxml-5.2.0.jar
+poi-ooxml-lite-5.2.0.jar
 protobuf-java-2.5.0.jar
 protostuff-api-1.6.2.jar
 protostuff-collectionschema-1.6.2.jar

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@linkis.apache.org
For additional commands, e-mail: commits-help@linkis.apache.org