You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by "neildhickey (via GitHub)" <gi...@apache.org> on 2023/06/06 12:04:15 UTC

[GitHub] [tvm] neildhickey commented on a diff in pull request #14855: [topi] Add `arm_cpu` specific pooling schedules

neildhickey commented on code in PR #14855:
URL: https://github.com/apache/tvm/pull/14855#discussion_r1219507525


##########
python/tvm/topi/arm_cpu/pooling.py:
##########
@@ -17,9 +17,88 @@
 # pylint: disable=invalid-name, unused-variable
 """Schedule for pooling operators"""
 
-from .mprofile.dsp.pool import pool_dsp_schedule
+import logging
+from tvm import topi, te
+from tvm.target import Target
+from .. import tag
 
 
 def schedule_pool(outs, layout):
-    """Create schedule for avgpool/maxpool with dsp"""
-    return pool_dsp_schedule(outs, layout)
+    """Create schedule for avgpool/maxpool"""
+
+    if layout != "NHWC":
+        logger = logging.getLogger("topi")
+        logger.warning(
+            """We currently only support NHWC target specific pools on arm_cpu,
+               falling back on generic pool scheduling"""
+        )
+        return topi.generic.schedule_pool(outs, layout)
+
+    return schedule_pool_2d(outs)
+
+
+def schedule_pool_2d(outs):
+    """Create arm_cpu specific 2D schedule for avgpool/maxpool"""
+
+    outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+    schedule_ops = [x.op for x in outs]
+    schedule = te.create_schedule(schedule_ops)
+    scheduled_ops = []
+
+    def traverse(op):
+        # Recursively inline any injective operation that isn't the pooling
+        # operation or hasn't slready been scheduled.

Review Comment:
   spelling mistake here, should be already not slready



##########
python/tvm/topi/arm_cpu/pooling.py:
##########
@@ -17,9 +17,88 @@
 # pylint: disable=invalid-name, unused-variable
 """Schedule for pooling operators"""
 
-from .mprofile.dsp.pool import pool_dsp_schedule
+import logging
+from tvm import topi, te
+from tvm.target import Target
+from .. import tag
 
 
 def schedule_pool(outs, layout):
-    """Create schedule for avgpool/maxpool with dsp"""
-    return pool_dsp_schedule(outs, layout)
+    """Create schedule for avgpool/maxpool"""
+
+    if layout != "NHWC":
+        logger = logging.getLogger("topi")
+        logger.warning(
+            """We currently only support NHWC target specific pools on arm_cpu,
+               falling back on generic pool scheduling"""
+        )
+        return topi.generic.schedule_pool(outs, layout)
+
+    return schedule_pool_2d(outs)
+
+
+def schedule_pool_2d(outs):
+    """Create arm_cpu specific 2D schedule for avgpool/maxpool"""
+
+    outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+    schedule_ops = [x.op for x in outs]
+    schedule = te.create_schedule(schedule_ops)
+    scheduled_ops = []
+
+    def traverse(op):
+        # Recursively inline any injective operation that isn't the pooling
+        # operation or hasn't slready been scheduled.
+        if tag.is_injective(op.tag):
+            if op not in schedule.outputs:
+                schedule[op].compute_inline()
+            for tensor in op.input_tensors:
+                if isinstance(tensor.op, te.tensor.ComputeOp) and tensor.op not in scheduled_ops:
+                    traverse(tensor.op)
+        # schedule the actual pooling operation
+        elif op.tag.startswith("pool"):
+            n, height, width, channel = schedule[op].op.axis
+            # Average pool consists of two parts; a sum then a division.
+            # We can schedule the division loop to parallelize across height and
+            # vectorize across width.
+            if op != outs[0].op:
+                output = outs[0]
+                output_fused = schedule[output].fuse(output.op.axis[1], output.op.axis[2])
+                schedule[output].parallel(output_fused)
+                _, inner = schedule[output].split(output.op.axis[3], 8)
+                schedule[output].vectorize(inner)
+
+            enable_explicit_vectorization = not Target.current(allow_none=False).features.has_sve
+            PaddedInput = op.input_tensors[0]
+            if isinstance(PaddedInput.op, te.tensor.ComputeOp):
+                schedule[PaddedInput].compute_inline()
+
+            # For targets without SVE try explicitly vectorizing the channel
+            # loop, For SVE targets leave the loop in place for LLVM to convert
+            #  into a scalable vector loop.

Review Comment:
   extra whitespace after #



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org