in python/tvm/relay/op/strategy/arm_cpu.py [0:0]
def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
"""conv2d arm cpu strategy"""
strategy = _op.OpStrategy()
data, kernel = inputs
dilation_h, dilation_w = attrs.get_int_tuple("dilation")
stride_h, stride_w = attrs.get_int_tuple("strides")
padding = attrs.get_int_tuple("padding")
groups = attrs.groups
layout = attrs.data_layout
kernel_layout = attrs.kernel_layout
if dilation_h < 1 or dilation_w < 1:
raise ValueError("dilation should be positive value")
isa = arm_isa.IsaAnalyzer(target)
if groups == 1:
if layout == "NCHW":
if kernel_layout == "OIHW":
# ARM conv2d spatial pack schedule.
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_spatial_pack),
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_spatial_pack),
name="conv2d_nchw_spatial_pack.arm_cpu",
)
# Intel x86 conv2d schedule.
strategy.add_implementation(
wrap_compute_conv2d(topi.x86.conv2d_nchw),
wrap_topi_schedule(topi.x86.schedule_conv2d_nchw),
name="conv2d_nchw.x86",
)
# check if winograd algorithm is applicable
_, _, kh, kw = get_const_tuple(kernel.shape)
pt, pl, pb, pr = topi.nn.get_pad_tuple(padding, (kh, kw))
is_winograd_applicable = (
"float" in data.dtype
and "float" in kernel.dtype
and kh == 3
and kw == 3
and stride_h == 1
and stride_w == 1
and dilation_h == 1
and dilation_w == 1
)
if is_winograd_applicable:
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_winograd),
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_winograd),
name="conv2d_nchw_winograd.arm_cpu",
plevel=5,
)
if "nnpack" in target.libs and pt == 1 and pb == 1 and pl == 1 and pr == 1:
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_winograd_nnpack),
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_winograd_nnpack),
name="conv2d_nchw_winograd_nnpack.arm_cpu",
plevel=15,
)
elif re.match(r"OIHW\d*o", kernel_layout):
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_spatial_pack),
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_spatial_pack),
name="conv2d_nchw_spatial_pack.arm_cpu",
)
else:
raise RuntimeError(
"Unsupported weight layout {} for conv2d NCHW".format(kernel_layout)
)
elif layout == "HWCN":
assert kernel_layout == "HWIO"
logger.warning("conv2d_hwcn is not optimized for arm cpu.")
strategy.add_implementation(
wrap_compute_conv2d(topi.nn.conv2d_hwcn),
wrap_topi_schedule(topi.generic.schedule_conv2d_hwcn),
name="conv2d_hwcn.generic",
)
elif layout == "NHWC":
channels = data.shape[3]
if "SMLAD" in isa and (channels % 4) == 0 and kernel_layout == "HWOI":
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.conv2d_direct_simd),
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_direct_simd),
name="conv2d_direct_simd.micro_dev",
)
elif kernel_layout == "HWIO":
is_aarch64 = topi.arm_cpu.arm_utils.is_aarch64_arm()
has_dot_prod = topi.arm_cpu.arm_utils.is_dotprod_available()
if has_dot_prod and data.dtype in ["int8", "uint8"]:
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.compute_conv2d_NHWC_quantized_native),
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_NHWC_quantized_native),
name="conv2d_NHWC_quantized_native.arm_cpu",
)
if is_aarch64 and data.dtype in ["int8", "uint8"]:
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.compute_conv2d_NHWC_quantized_interleaved),
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_NHWC_quantized_interleaved),
name="conv2d_NHWC_quantized_interleaved.arm_cpu",
)
if (not is_aarch64) or (data.dtype not in ["int8", "uint8"]):
# TODO(@giuseros)
# This strategy errors out for quantized data types when tuning.
# Let's use this only for non-aarch64 or non-quantized cases
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.conv2d_nhwc_spatial_pack),
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nhwc_spatial_pack),
name="conv2d_nhwc_spatial_pack.arm_cpu",
)
else:
raise RuntimeError(
"Unsupported kernel layout {} for conv2d NHWC".format(kernel_layout)
)
else:
raise RuntimeError("Unsupported conv2d layout {} for arm cpu".format(layout))
elif is_depthwise_conv2d(data.shape, layout, kernel.shape, kernel_layout, groups):
if layout == "NCHW":
assert kernel_layout == "OIHW" or re.match(r"OIHW\d*o", kernel_layout)
# ARM conv2d depthwise schedule
if kernel_layout == "OIHW":
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.depthwise_conv2d_nchw),
wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nchw),
name="depthwise_conv2d_nchw.arm_cpu",
)
# TODO:
# This schedule has incorrect result on some hardware platforms (like NV Jetson TX2)
# Let us comment it out but not remove.
# see discussion:
# https://discuss.tvm.apache.org/t/autotuner-incorrect-result-after-tuning-mobilenetv2-on-arm-cpu/6088
# strategy.add_implementation(
# wrap_compute_conv2d(topi.arm_cpu.depthwise_conv2d_nchw_spatial_pack),
# wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nchw_spatial_pack),
# name="depthwise_conv2d_nchw_spatial_pack.arm_cpu",
# plevel=15)
# Intel x86 depthwise conv2d schedule.
channel_multiplier = get_const_tuple(inputs[1].shape)[1]
if channel_multiplier == 1 and dilation_h == 1 and dilation_w == 1:
strategy.add_implementation(
wrap_compute_conv2d(topi.x86.depthwise_conv2d_nchw),
wrap_topi_schedule(topi.x86.schedule_depthwise_conv2d_nchw),
name="depthwise_conv2d_nchw.x86",
)
elif layout == "NHWC":
assert kernel_layout == "HWOI"
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
name="depthwise_conv2d_nhwc.arm_cpu",
)
else:
raise RuntimeError("Unsupported depthwise_conv2d layout {} for arm cpu".format(layout))
else: # group_conv2d
if layout == "NCHW":
assert kernel_layout == "OIHW"
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.group_conv2d_nchw, has_groups=True),
wrap_topi_schedule(topi.arm_cpu.schedule_group_conv2d_nchw),
name="group_conv2d_nchw.arm_cpu",
)
elif layout == "NHWC":
assert kernel_layout == "HWIO"
logger.warning("group_conv2d with layout NHWC is not optimized for arm cpu.")
strategy.add_implementation(
wrap_compute_conv2d(topi.nn.group_conv2d_nhwc, has_groups=True),
wrap_topi_schedule(topi.generic.schedule_group_conv2d_nhwc),
name="group_conv2d_nhwc.generic",
)
else:
raise RuntimeError("Unsupported group_conv2d layout {} for arm cpu".format(layout))
return strategy