You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by "morningman (via GitHub)" <gi...@apache.org> on 2023/06/02 07:42:57 UTC
[GitHub] [doris] morningman commented on a diff in pull request #19819: [feature-wip](multi-catalog)(step2)support read max compute data by JNI

morningman commented on code in PR #19819:
URL: https://github.com/apache/doris/pull/19819#discussion_r1214018756


##########
fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeScanNode.java:
##########
@@ -74,7 +78,37 @@ protected Map<String, String> getLocationProperties() throws UserException {
     @Override
     protected List<Split> getSplits() throws UserException {
         List<Split> result = new ArrayList<>();
-        result.add(new FileSplit(new Path("/"), 0, -1, -1, 0L, new String[0], Collections.emptyList()));
+        // String splitPath = catalog.getTunnelUrl();
+        // TODO: use single max compute scan node rather than file scan node
+        com.aliyun.odps.Table odpsTable = table.getOdpsTable();
+        if (desc.getSlots().isEmpty() || odpsTable.getFileNum() <= 0) {
+            return result;
+        }
+        try {
+            List<Pair<Long, Long>> sliceRange = new ArrayList<>();
+            long totalRows = catalog.getTotalRows(table.getDbName(), table.getName());
+            long fileNum = odpsTable.getFileNum();
+            long splitSize = (long) Math.ceil((double) totalRows / fileNum);

Review Comment:
   If the `totalRows` is less than `fileNum`, the `splitSize` will be 0.



##########
fe/java-udf/src/main/java/org/apache/doris/jni/vec/VectorColumn.java:
##########
@@ -21,6 +21,8 @@
 import org.apache.doris.jni.utils.TypeNativeBytes;
 import org.apache.doris.jni.vec.ColumnType.Type;
 
+import org.apache.log4j.Logger;

Review Comment:
   Wrong logger



##########
fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeScanNode.java:
##########
@@ -74,7 +78,37 @@ protected Map<String, String> getLocationProperties() throws UserException {
     @Override
     protected List<Split> getSplits() throws UserException {
         List<Split> result = new ArrayList<>();
-        result.add(new FileSplit(new Path("/"), 0, -1, -1, 0L, new String[0], Collections.emptyList()));
+        // String splitPath = catalog.getTunnelUrl();
+        // TODO: use single max compute scan node rather than file scan node
+        com.aliyun.odps.Table odpsTable = table.getOdpsTable();
+        if (desc.getSlots().isEmpty() || odpsTable.getFileNum() <= 0) {
+            return result;
+        }
+        try {
+            List<Pair<Long, Long>> sliceRange = new ArrayList<>();
+            long totalRows = catalog.getTotalRows(table.getDbName(), table.getName());

Review Comment:
   Is this `getTotalRows()` method expensive?



##########
be/src/vec/exec/jni_connector.cpp:
##########
@@ -63,14 +63,15 @@ JniConnector::~JniConnector() {
 }
 
 Status JniConnector::open(RuntimeState* state, RuntimeProfile* profile) {
-    RETURN_IF_ERROR(JniUtil::GetJNIEnv(&_env));
-    if (_env == nullptr) {
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env));
+    if (env == nullptr) {
         return Status::InternalError("Failed to get/create JVM");
     }
-    RETURN_IF_ERROR(_init_jni_scanner(_env, state->batch_size()));
+    RETURN_IF_ERROR(_init_jni_scanner(env, state->batch_size()));
     // Call org.apache.doris.jni.JniScanner#open
-    _env->CallVoidMethod(_jni_scanner_obj, _jni_scanner_open);
-    RETURN_ERROR_IF_EXC(_env);
+    env->CallVoidMethod(_jni_scanner_obj, _jni_scanner_open);
+    RETURN_ERROR_IF_EXC(env);

Review Comment:
   Why using a local env here? And do we need to release this env?
   Add some comment in code.



##########
fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeScanNode.java:
##########
@@ -74,7 +78,37 @@ protected Map<String, String> getLocationProperties() throws UserException {
     @Override
     protected List<Split> getSplits() throws UserException {
         List<Split> result = new ArrayList<>();
-        result.add(new FileSplit(new Path("/"), 0, -1, -1, 0L, new String[0], Collections.emptyList()));
+        // String splitPath = catalog.getTunnelUrl();
+        // TODO: use single max compute scan node rather than file scan node
+        com.aliyun.odps.Table odpsTable = table.getOdpsTable();
+        if (desc.getSlots().isEmpty() || odpsTable.getFileNum() <= 0) {
+            return result;
+        }
+        try {
+            List<Pair<Long, Long>> sliceRange = new ArrayList<>();
+            long totalRows = catalog.getTotalRows(table.getDbName(), table.getName());
+            long fileNum = odpsTable.getFileNum();
+            long splitSize = (long) Math.ceil((double) totalRows / fileNum);

Review Comment:
   And what if there is only 100 rows, and 10 files. Do we need to split into 10 ranges? I think this is too small.



##########
fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java:
##########
@@ -193,7 +193,10 @@ private void setColumnPositionMapping()
             throws UserException {
         TableIf tbl = getTargetTable();
         List<Integer> columnIdxs = Lists.newArrayList();
-
+        if (params.getRequiredSlots() == null) {

Review Comment:
   What is this for？ Add some comment to explain it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org