def conv2d_strategy_arm

def conv2d_strategy_arm_cpu()

in python/tvm/relay/op/strategy/arm_cpu.py [0:0]
148 lines of code
46 McCabe index (conditional complexity)

def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
    """conv2d arm cpu strategy"""
    strategy = _op.OpStrategy()
    data, kernel = inputs
    dilation_h, dilation_w = attrs.get_int_tuple("dilation")
    stride_h, stride_w = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    groups = attrs.groups
    layout = attrs.data_layout
    kernel_layout = attrs.kernel_layout
    if dilation_h < 1 or dilation_w < 1:
        raise ValueError("dilation should be positive value")

    isa = arm_isa.IsaAnalyzer(target)

    if groups == 1:
        if layout == "NCHW":
            if kernel_layout == "OIHW":
                # ARM conv2d spatial pack schedule.
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_spatial_pack),
                    wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_spatial_pack),
                    name="conv2d_nchw_spatial_pack.arm_cpu",
                )

                # Intel x86 conv2d schedule.
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.x86.conv2d_nchw),
                    wrap_topi_schedule(topi.x86.schedule_conv2d_nchw),
                    name="conv2d_nchw.x86",
                )

                # check if winograd algorithm is applicable
                _, _, kh, kw = get_const_tuple(kernel.shape)
                pt, pl, pb, pr = topi.nn.get_pad_tuple(padding, (kh, kw))
                is_winograd_applicable = (
                    "float" in data.dtype
                    and "float" in kernel.dtype
                    and kh == 3
                    and kw == 3
                    and stride_h == 1
                    and stride_w == 1
                    and dilation_h == 1
                    and dilation_w == 1
                )
                if is_winograd_applicable:
                    strategy.add_implementation(
                        wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_winograd),
                        wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_winograd),
                        name="conv2d_nchw_winograd.arm_cpu",
                        plevel=5,
                    )
                    if "nnpack" in target.libs and pt == 1 and pb == 1 and pl == 1 and pr == 1:
                        strategy.add_implementation(
                            wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_winograd_nnpack),
                            wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_winograd_nnpack),
                            name="conv2d_nchw_winograd_nnpack.arm_cpu",
                            plevel=15,
                        )
            elif re.match(r"OIHW\d*o", kernel_layout):
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_spatial_pack),
                    wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_spatial_pack),
                    name="conv2d_nchw_spatial_pack.arm_cpu",
                )
            else:
                raise RuntimeError(
                    "Unsupported weight layout {} for conv2d NCHW".format(kernel_layout)
                )
        elif layout == "HWCN":
            assert kernel_layout == "HWIO"
            logger.warning("conv2d_hwcn is not optimized for arm cpu.")
            strategy.add_implementation(
                wrap_compute_conv2d(topi.nn.conv2d_hwcn),
                wrap_topi_schedule(topi.generic.schedule_conv2d_hwcn),
                name="conv2d_hwcn.generic",
            )
        elif layout == "NHWC":
            channels = data.shape[3]
            if "SMLAD" in isa and (channels % 4) == 0 and kernel_layout == "HWOI":
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.arm_cpu.conv2d_direct_simd),
                    wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_direct_simd),
                    name="conv2d_direct_simd.micro_dev",
                )
            elif kernel_layout == "HWIO":
                is_aarch64 = topi.arm_cpu.arm_utils.is_aarch64_arm()
                has_dot_prod = topi.arm_cpu.arm_utils.is_dotprod_available()
                if has_dot_prod and data.dtype in ["int8", "uint8"]:
                    strategy.add_implementation(
                        wrap_compute_conv2d(topi.arm_cpu.compute_conv2d_NHWC_quantized_native),
                        wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_NHWC_quantized_native),
                        name="conv2d_NHWC_quantized_native.arm_cpu",
                    )
                if is_aarch64 and data.dtype in ["int8", "uint8"]:
                    strategy.add_implementation(
                        wrap_compute_conv2d(topi.arm_cpu.compute_conv2d_NHWC_quantized_interleaved),
                        wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_NHWC_quantized_interleaved),
                        name="conv2d_NHWC_quantized_interleaved.arm_cpu",
                    )
                if (not is_aarch64) or (data.dtype not in ["int8", "uint8"]):
                    # TODO(@giuseros)
                    # This strategy errors out for quantized data types when tuning.
                    # Let's use this only for non-aarch64 or non-quantized cases
                    strategy.add_implementation(
                        wrap_compute_conv2d(topi.arm_cpu.conv2d_nhwc_spatial_pack),
                        wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nhwc_spatial_pack),
                        name="conv2d_nhwc_spatial_pack.arm_cpu",
                    )
            else:
                raise RuntimeError(
                    "Unsupported kernel layout {} for conv2d NHWC".format(kernel_layout)
                )

        else:
            raise RuntimeError("Unsupported conv2d layout {} for arm cpu".format(layout))
    elif is_depthwise_conv2d(data.shape, layout, kernel.shape, kernel_layout, groups):
        if layout == "NCHW":
            assert kernel_layout == "OIHW" or re.match(r"OIHW\d*o", kernel_layout)
            # ARM conv2d depthwise schedule
            if kernel_layout == "OIHW":
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.arm_cpu.depthwise_conv2d_nchw),
                    wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nchw),
                    name="depthwise_conv2d_nchw.arm_cpu",
                )

            # TODO:
            # This schedule has incorrect result on some hardware platforms (like NV Jetson TX2)
            # Let us comment it out but not remove.
            # see discussion:
            # https://discuss.tvm.apache.org/t/autotuner-incorrect-result-after-tuning-mobilenetv2-on-arm-cpu/6088
            # strategy.add_implementation(
            #     wrap_compute_conv2d(topi.arm_cpu.depthwise_conv2d_nchw_spatial_pack),
            #     wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nchw_spatial_pack),
            #     name="depthwise_conv2d_nchw_spatial_pack.arm_cpu",
            #     plevel=15)

            # Intel x86 depthwise conv2d schedule.
            channel_multiplier = get_const_tuple(inputs[1].shape)[1]
            if channel_multiplier == 1 and dilation_h == 1 and dilation_w == 1:
                strategy.add_implementation(
                    wrap_compute_conv2d(topi.x86.depthwise_conv2d_nchw),
                    wrap_topi_schedule(topi.x86.schedule_depthwise_conv2d_nchw),
                    name="depthwise_conv2d_nchw.x86",
                )
        elif layout == "NHWC":
            assert kernel_layout == "HWOI"
            strategy.add_implementation(
                wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
                wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
                name="depthwise_conv2d_nhwc.arm_cpu",
            )
        else:
            raise RuntimeError("Unsupported depthwise_conv2d layout {} for arm cpu".format(layout))
    else:  # group_conv2d
        if layout == "NCHW":
            assert kernel_layout == "OIHW"
            strategy.add_implementation(
                wrap_compute_conv2d(topi.arm_cpu.group_conv2d_nchw, has_groups=True),
                wrap_topi_schedule(topi.arm_cpu.schedule_group_conv2d_nchw),
                name="group_conv2d_nchw.arm_cpu",
            )
        elif layout == "NHWC":
            assert kernel_layout == "HWIO"
            logger.warning("group_conv2d with layout NHWC is not optimized for arm cpu.")
            strategy.add_implementation(
                wrap_compute_conv2d(topi.nn.group_conv2d_nhwc, has_groups=True),
                wrap_topi_schedule(topi.generic.schedule_group_conv2d_nhwc),
                name="group_conv2d_nhwc.generic",
            )
        else:
            raise RuntimeError("Unsupported group_conv2d layout {} for arm cpu".format(layout))
    return strategy