You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2020/05/14 20:26:09 UTC
[asterixdb] 16/26: [NO ISSUE][RT] Collect Tuple Stats in External
Scan
This is an automated email from the ASF dual-hosted git repository.
mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 1113903ec39cfa267b824550aafd2621e5cdb9fe
Author: Murtadha Hubail <mh...@apache.org>
AuthorDate: Fri May 8 05:03:25 2020 +0300
[NO ISSUE][RT] Collect Tuple Stats in External Scan
- user model changes: no
- storage format changes: no
- interface changes: yes
Details:
- Report number of procssed tuples in external scan operator.
- Add test case.
Change-Id: I5dda25f1fc53581dcc5663f2516e79b9b66fd0a5
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6224
Reviewed-by: Murtadha Hubail <mh...@apache.org>
Reviewed-by: Ali Alsuliman <al...@gmail.com>
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
---
.../src/test/resources/runtimets/metrics.xml | 5 +++++
.../external-dataset/external-dataset.1.ddl.sqlpp} | 18 +++++++++---------
.../external-dataset/external-dataset.2.metrics.sqlpp} | 17 ++++++-----------
.../external-dataset/external-datasett.3.ddl.sqlpp} | 17 +++++------------
.../external-dataset/external-dataset.2.regexadm | 1 +
.../asterix/common/external/IDataSourceAdapter.java | 7 +++++++
.../asterix/external/api/IDataFlowController.java | 7 +++++++
.../external/dataflow/AbstractDataFlowController.java | 6 ++++++
.../external/dataflow/RecordDataFlowController.java | 2 ++
.../external/dataflow/StreamDataFlowController.java | 2 ++
.../asterix/external/dataset/adapter/FeedAdapter.java | 5 +++++
.../external/dataset/adapter/GenericAdapter.java | 5 +++++
.../operators/ExternalScanOperatorDescriptor.java | 11 ++++++++++-
13 files changed, 70 insertions(+), 33 deletions(-)
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/metrics.xml b/asterixdb/asterix-app/src/test/resources/runtimets/metrics.xml
index da7ba31..a2c3ae5 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/metrics.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/metrics.xml
@@ -40,5 +40,10 @@
<output-dir compare="Text">secondary-index-index-only</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="metrics">
+ <compilation-unit name="external-dataset">
+ <output-dir compare="Text">external-dataset</output-dir>
+ </compilation-unit>
+ </test-case>
</test-group>
</test-suite>
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.1.ddl.sqlpp
similarity index 67%
copy from asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.1.ddl.sqlpp
index a324496..a105dfe 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.1.ddl.sqlpp
@@ -16,16 +16,16 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.external.dataflow;
+/*
+ * Description : Processed objects metrics on external dataset
+ * Expected Res : Success
+ */
-import org.apache.asterix.external.api.IDataFlowController;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
-public abstract class AbstractDataFlowController implements IDataFlowController {
+USE test;
- protected final IHyracksTaskContext ctx;
+CREATE TYPE t1 AS {f1: string, f2: string, f3: string, f4: string, f5: string};
- public AbstractDataFlowController(IHyracksTaskContext ctx) {
- this.ctx = ctx;
- }
-}
+CREATE EXTERNAL DATASET ds1(t1) USING localfs(("path"="asterix_nc1://data/csv/sample_09.csv"), ("format"="CSV"), ("header"="FALSE"));
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.2.metrics.sqlpp
similarity index 67%
copy from asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.2.metrics.sqlpp
index a324496..e57e938 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.2.metrics.sqlpp
@@ -16,16 +16,11 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.external.dataflow;
-
-import org.apache.asterix.external.api.IDataFlowController;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-public abstract class AbstractDataFlowController implements IDataFlowController {
+/*
+ * Description : Processed objects metrics on external dataset
+ * Expected Res : Success
+ */
- protected final IHyracksTaskContext ctx;
+USE test;
- public AbstractDataFlowController(IHyracksTaskContext ctx) {
- this.ctx = ctx;
- }
-}
+SELECT COUNT(*) from ds1;
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-datasett.3.ddl.sqlpp
similarity index 67%
copy from asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-datasett.3.ddl.sqlpp
index a324496..0bf95e4 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-datasett.3.ddl.sqlpp
@@ -16,16 +16,9 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.external.dataflow;
-
-import org.apache.asterix.external.api.IDataFlowController;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-public abstract class AbstractDataFlowController implements IDataFlowController {
-
- protected final IHyracksTaskContext ctx;
+/*
+ * Description : Processed objects metrics on external dataset
+ * Expected Res : Success
+ */
- public AbstractDataFlowController(IHyracksTaskContext ctx) {
- this.ctx = ctx;
- }
-}
+DROP DATAVERSE test;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/metrics/external-dataset/external-dataset.2.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/metrics/external-dataset/external-dataset.2.regexadm
new file mode 100644
index 0000000..ae84a71
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/metrics/external-dataset/external-dataset.2.regexadm
@@ -0,0 +1 @@
+.*"processedObjects":15.*
\ No newline at end of file
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IDataSourceAdapter.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IDataSourceAdapter.java
index 18f59f2..8fc70b8 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IDataSourceAdapter.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IDataSourceAdapter.java
@@ -49,4 +49,11 @@ public interface IDataSourceAdapter {
* @throws Exception
*/
public void start(int partition, IFrameWriter writer) throws HyracksDataException, InterruptedException;
+
+ /**
+ * @return The number of processed tuples by this adapter
+ */
+ default long getProcessedTuples() {
+ throw new UnsupportedOperationException();
+ }
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java
index f59b82e..ccc420b 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java
@@ -43,4 +43,11 @@ public interface IDataFlowController {
public default boolean stop(long timeout) throws HyracksDataException {
throw new RuntimeDataException(ErrorCode.OPERATION_NOT_SUPPORTED);
}
+
+ /**
+ * @return The number of processed tuples by this controller
+ */
+ default long getProcessedTuples() {
+ throw new UnsupportedOperationException();
+ }
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
index a324496..95024e1 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
@@ -24,8 +24,14 @@ import org.apache.hyracks.api.context.IHyracksTaskContext;
public abstract class AbstractDataFlowController implements IDataFlowController {
protected final IHyracksTaskContext ctx;
+ protected long processedTuples = 0;
public AbstractDataFlowController(IHyracksTaskContext ctx) {
this.ctx = ctx;
}
+
+ @Override
+ public long getProcessedTuples() {
+ return processedTuples;
+ }
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java
index 34379e9..2c19f9d 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java
@@ -44,6 +44,7 @@ public class RecordDataFlowController<T> extends AbstractDataFlowController {
@Override
public void start(IFrameWriter writer) throws HyracksDataException {
try {
+ processedTuples = 0;
ArrayTupleBuilder tb = new ArrayTupleBuilder(numOfTupleFields);
TupleForwarder tupleForwarder = new TupleForwarder(ctx, writer);
while (recordReader.hasNext()) {
@@ -53,6 +54,7 @@ public class RecordDataFlowController<T> extends AbstractDataFlowController {
tb.addFieldEndOffset();
appendOtherTupleFields(tb);
tupleForwarder.addTuple(tb);
+ processedTuples++;
}
}
tupleForwarder.complete();
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java
index 8275953..9c11c97 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java
@@ -35,6 +35,7 @@ public class StreamDataFlowController extends AbstractDataFlowController {
@Override
public void start(IFrameWriter writer) throws HyracksDataException {
try {
+ processedTuples = 0;
ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
TupleForwarder tupleForwarder = new TupleForwarder(ctx, writer);
while (true) {
@@ -44,6 +45,7 @@ public class StreamDataFlowController extends AbstractDataFlowController {
}
tb.addFieldEndOffset();
tupleForwarder.addTuple(tb);
+ processedTuples++;
}
tupleForwarder.complete();
} catch (Exception e) {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedAdapter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedAdapter.java
index 0ab59fe..123a552 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedAdapter.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedAdapter.java
@@ -38,6 +38,11 @@ public class FeedAdapter implements IDataSourceAdapter, Closeable {
controller.start(writer);
}
+ @Override
+ public long getProcessedTuples() {
+ return controller.getProcessedTuples();
+ }
+
public boolean stop(long timeout) throws HyracksDataException {
return controller.stop(timeout);
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java
index 0904384..17a134b 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java
@@ -35,4 +35,9 @@ public class GenericAdapter implements IDataSourceAdapter {
public void start(int partition, IFrameWriter writer) throws HyracksDataException, InterruptedException {
controller.start(writer);
}
+
+ @Override
+ public long getProcessedTuples() {
+ return controller.getProcessedTuples();
+ }
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalScanOperatorDescriptor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalScanOperatorDescriptor.java
index 4fd5151..1d7623d 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalScanOperatorDescriptor.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalScanOperatorDescriptor.java
@@ -26,6 +26,7 @@ import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.job.JobSpecification;
+import org.apache.hyracks.api.job.profiling.IOperatorStats;
import org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
import org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable;
@@ -53,13 +54,21 @@ public class ExternalScanOperatorDescriptor extends AbstractSingleActivityOperat
return new AbstractUnaryOutputSourceOperatorNodePushable() {
+ private IOperatorStats stats;
+
@Override
public void initialize() throws HyracksDataException {
- IDataSourceAdapter adapter = null;
+ IDataSourceAdapter adapter;
+ if (ctx.getStatsCollector() != null) {
+ stats = ctx.getStatsCollector().getOrAddOperatorStats(getDisplayName());
+ }
try {
writer.open();
adapter = adapterFactory.createAdapter(ctx, partition);
adapter.start(partition, writer);
+ if (stats != null) {
+ stats.getTupleCounter().update(adapter.getProcessedTuples());
+ }
} catch (Exception e) {
writer.fail();
throw HyracksDataException.create(e);