tools/amd_build/pyHIPIFY/hipify_python.py (298 lines of code) (raw):
#!/usr/bin/env python3
""" The Python Hipify script.
##
# Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
# 2017-2018 Advanced Micro Devices, Inc. and
# Facebook Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
"""
from __future__ import absolute_import, division, print_function
import fnmatch
import os
import re
from pyHIPIFY.cuda_to_hip_mappings import CUDA_TO_HIP_MAPPINGS
class InputError(Exception):
# Exception raised for errors in the input.
def __init__(self, message):
super(InputError, self).__init__(message)
self.message = message
def __str__(self):
return "{}: {}".format("Input error", self.message)
def matched_files_iter(root_path, includes=("*",), ignores=(), extensions=()):
def _fnmatch(filepath, patterns):
return any(fnmatch.fnmatch(filepath, pattern) for pattern in patterns)
def match_extensions(filename):
"""Helper method to see if filename ends with certain extension"""
return any(filename.endswith(e) for e in extensions)
exact_matches = set(includes)
# This is a very rough heuristic; really, we want to avoid scanning
# any file which is not checked into source control, but this script
# needs to work even if you're in a Git or Hg checkout, so easier to
# just blacklist the biggest time sinks that won't matter in the
# end.
for (abs_dirpath, dirs, filenames) in os.walk(root_path, topdown=True):
rel_dirpath = os.path.relpath(abs_dirpath, root_path)
if rel_dirpath == ".":
# Blah blah blah O(n) blah blah
if ".git" in dirs:
dirs.remove(".git")
if "build" in dirs:
dirs.remove("build")
if "third_party" in dirs:
dirs.remove("third_party")
for filename in filenames:
filepath = os.path.join(rel_dirpath, filename)
# We respect extensions, UNLESS you wrote the entire
# filename verbatim, in which case we always accept it
if (
_fnmatch(filepath, includes)
and (not _fnmatch(filepath, ignores))
and (match_extensions(filepath) or filepath in exact_matches)
):
yield filepath
def preprocess(project_directory, output_directory, all_files, show_progress=True):
"""
Call preprocessor on selected files.
"""
for filepath in all_files:
preprocessor(project_directory, output_directory, filepath)
if show_progress:
print(filepath, "->", get_hip_file_path(filepath))
print("Successfully preprocessed all matching files.")
def add_dim3(kernel_string, cuda_kernel):
"""adds dim3() to the second and third arguments in the kernel launch"""
count = 0
closure = 0
kernel_string = kernel_string.replace("<<<", "").replace(">>>", "")
arg_locs = [{} for _ in range(2)]
arg_locs[count]["start"] = 0
for ind, c in enumerate(kernel_string):
if count > 1:
break
if c == "(":
closure += 1
elif c == ")":
closure -= 1
elif (c == "," or ind == len(kernel_string) - 1) and closure == 0:
arg_locs[count]["end"] = ind + (c != ",")
count += 1
if count < 2:
arg_locs[count]["start"] = ind + 1
first_arg_raw = kernel_string[arg_locs[0]["start"] : arg_locs[0]["end"] + 1]
second_arg_raw = kernel_string[arg_locs[1]["start"] : arg_locs[1]["end"]]
first_arg_clean = (
kernel_string[arg_locs[0]["start"] : arg_locs[0]["end"]]
.replace("\n", "")
.strip(" ")
)
second_arg_clean = (
kernel_string[arg_locs[1]["start"] : arg_locs[1]["end"]]
.replace("\n", "")
.strip(" ")
)
first_arg_dim3 = "dim3({})".format(first_arg_clean)
second_arg_dim3 = "dim3({})".format(second_arg_clean)
first_arg_raw_dim3 = first_arg_raw.replace(first_arg_clean, first_arg_dim3)
second_arg_raw_dim3 = second_arg_raw.replace(second_arg_clean, second_arg_dim3)
cuda_kernel = cuda_kernel.replace(
first_arg_raw + second_arg_raw, first_arg_raw_dim3 + second_arg_raw_dim3
)
return cuda_kernel
RE_KERNEL_LAUNCH = re.compile(r"([ ]+)(detail?)::[ ]+\\\n[ ]+")
def processKernelLaunches(string):
"""Replace the CUDA style Kernel launches with the HIP style kernel launches."""
# Concat the namespace with the kernel names. (Find cleaner way of doing this later).
string = RE_KERNEL_LAUNCH.sub(
lambda inp: "{0}{1}::".format(inp.group(1), inp.group(2)), string
)
def grab_method_and_template(in_kernel):
# The positions for relevant kernel components.
pos = {
"kernel_launch": {"start": in_kernel["start"], "end": in_kernel["end"]},
"kernel_name": {"start": -1, "end": -1},
"template": {"start": -1, "end": -1},
}
# Count for balancing template
count = {"<>": 0}
# Status for whether we are parsing a certain item.
START = 0
AT_TEMPLATE = 1
AFTER_TEMPLATE = 2
AT_KERNEL_NAME = 3
status = START
# Parse the string character by character
for i in range(pos["kernel_launch"]["start"] - 1, -1, -1):
char = string[i]
# Handle Templating Arguments
if status == START or status == AT_TEMPLATE:
if char == ">":
if status == START:
status = AT_TEMPLATE
pos["template"]["end"] = i
count["<>"] += 1
if char == "<":
count["<>"] -= 1
if count["<>"] == 0 and (status == AT_TEMPLATE):
pos["template"]["start"] = i
status = AFTER_TEMPLATE
# Handle Kernel Name
if status != AT_TEMPLATE:
if string[i].isalnum() or string[i] in {"(", ")", "_", ":", "#"}:
if status != AT_KERNEL_NAME:
status = AT_KERNEL_NAME
pos["kernel_name"]["end"] = i
# Case: Kernel name starts the string.
if i == 0:
pos["kernel_name"]["start"] = 0
# Finished
return [
(pos["kernel_name"]),
(pos["template"]),
(pos["kernel_launch"]),
]
else:
# Potential ending point if we're already traversing a kernel's name.
if status == AT_KERNEL_NAME:
pos["kernel_name"]["start"] = i
# Finished
return [
(pos["kernel_name"]),
(pos["template"]),
(pos["kernel_launch"]),
]
def find_kernel_bounds(string):
"""Finds the starting and ending points for all kernel launches in the string."""
kernel_end = 0
kernel_positions = []
# Continue until we cannot find any more kernels anymore.
while string.find("<<<", kernel_end) != -1:
# Get kernel starting position (starting from the previous ending point)
kernel_start = string.find("<<<", kernel_end)
# Get kernel ending position (adjust end point past the >>>)
kernel_end = string.find(">>>", kernel_start) + 3
if kernel_end <= 0:
raise InputError("no kernel end found")
# Add to list of traversed kernels
kernel_positions.append(
{
"start": kernel_start,
"end": kernel_end,
"group": string[kernel_start:kernel_end],
}
)
return kernel_positions
# Grab positional ranges of all kernel launchces
get_kernel_positions = [k for k in find_kernel_bounds(string)]
output_string = string
# Replace each CUDA kernel with a HIP kernel.
for kernel in get_kernel_positions:
# Get kernel components
params = grab_method_and_template(kernel)
# Find parenthesis after kernel launch
parenthesis = string.find("(", kernel["end"])
# Extract cuda kernel
cuda_kernel = string[params[0]["start"] : parenthesis + 1]
kernel_string = string[kernel["start"] : kernel["end"]]
cuda_kernel_dim3 = add_dim3(kernel_string, cuda_kernel)
# Keep number of kernel launch params consistent (grid dims, group dims, stream, dynamic shared size)
num_klp = len(
extract_arguments(
0, kernel["group"].replace("<<<", "(").replace(">>>", ")")
)
)
hip_kernel = "hipLaunchKernelGGL(" + cuda_kernel_dim3[0:-1].replace(
">>>", ", 0" * (4 - num_klp) + ">>>"
).replace("<<<", ", ").replace(">>>", ", ")
# Replace cuda kernel with hip kernel
output_string = output_string.replace(cuda_kernel, hip_kernel)
return output_string
def get_hip_file_path(filepath):
"""
Returns the new name of the hipified file
"""
dirpath, filename = os.path.split(filepath)
root, ext = os.path.splitext(filename)
# Concretely, we do the following:
#
# - If there is a directory component named "cuda", replace
# it with "hip", AND
#
# - If the file name contains "CUDA", replace it with "HIP", AND
# Furthermore, ALWAYS replace '.cu' with '.hip', because those files
# contain CUDA kernels that needs to be hipified and processed with
# hcc compiler
#
# This isn't set in stone; we might adjust this to support other
# naming conventions.
if ext == ".cu":
ext = ".hip"
orig_dirpath = dirpath
dirpath = dirpath.replace("cuda", "hip")
root = root.replace("cuda", "hip")
root = root.replace("CUDA", "HIP")
return os.path.join(dirpath, root + ext)
# Cribbed from https://stackoverflow.com/questions/42742810/speed-up-millions-of-regex-replacements-in-python-3/42789508#42789508
class Trie:
"""Regex::Trie in Python. Creates a Trie out of a list of words. The trie can be exported to a Regex pattern.
The corresponding Regex should match much faster than a simple Regex union."""
def __init__(self):
self.data = {}
def add(self, word):
ref = self.data
for char in word:
ref[char] = char in ref and ref[char] or {}
ref = ref[char]
ref[""] = 1
def dump(self):
return self.data
def quote(self, char):
return re.escape(char)
def _pattern(self, pData):
data = pData
if "" in data and len(data.keys()) == 1:
return None
alt = []
cc = []
q = 0
for char in sorted(data.keys()):
if isinstance(data[char], dict):
try:
recurse = self._pattern(data[char])
alt.append(self.quote(char) + recurse)
except Exception:
cc.append(self.quote(char))
else:
q = 1
cconly = not len(alt) > 0
if len(cc) > 0:
if len(cc) == 1:
alt.append(cc[0])
else:
alt.append("[" + "".join(cc) + "]")
if len(alt) == 1:
result = alt[0]
else:
result = "(?:" + "|".join(alt) + ")"
if q:
if cconly:
result += "?"
else:
result = "(?:%s)?" % result
return result
def pattern(self):
return self._pattern(self.dump())
RE_TRIE = Trie()
RE_MAP = {}
for mapping in CUDA_TO_HIP_MAPPINGS:
for src, value in mapping.items():
dst = value[0]
RE_TRIE.add(src)
RE_MAP[src] = dst
RE_PREPROCESSOR = re.compile(RE_TRIE.pattern())
def re_replace(input_string):
def sub_repl(m):
return RE_MAP[m.group(0)]
return RE_PREPROCESSOR.sub(sub_repl, input_string)
def preprocessor(project_directory, output_directory, filepath):
"""Executes the CUDA -> HIP conversion on the specified file."""
fin_path = os.path.join(project_directory, filepath)
with open(fin_path, "r") as fin:
output_source = fin.read()
fout_path = os.path.join(output_directory, get_hip_file_path(filepath))
assert os.path.join(output_directory, fout_path) != os.path.join(
project_directory, fin_path
)
if not os.path.exists(os.path.dirname(fout_path)):
os.makedirs(os.path.dirname(fout_path))
with open(fout_path, "w") as fout:
output_source = re_replace(output_source)
# Perform Kernel Launch Replacements
output_source = processKernelLaunches(output_source)
fout.write(output_source)
def extract_arguments(start, string):
"""Return the list of arguments in the upcoming function parameter closure.
Example:
string (input): '(blocks, threads, 0, THCState_getCurrentStream(state))'
arguments (output):
'[{'start': 1, 'end': 7},
{'start': 8, 'end': 16},
{'start': 17, 'end': 19},
{'start': 20, 'end': 53}]'
"""
arguments = []
closures = {"<": 0, "(": 0}
current_position = start
argument_start_pos = current_position + 1
# Search for final parenthesis
while current_position < len(string):
if string[current_position] == "(":
closures["("] += 1
elif string[current_position] == ")":
closures["("] -= 1
elif string[current_position] == "<":
closures["<"] += 1
elif (
string[current_position] == ">"
and string[current_position - 1] != "-"
and closures["<"] > 0
):
closures["<"] -= 1
# Finished all arguments
if closures["("] == 0 and closures["<"] == 0:
# Add final argument
arguments.append({"start": argument_start_pos, "end": current_position})
break
# Finished current argument
if (
closures["("] == 1
and closures["<"] == 0
and string[current_position] == ","
):
arguments.append({"start": argument_start_pos, "end": current_position})
argument_start_pos = current_position + 1
current_position += 1
return arguments
def hipify(
project_directory,
extensions=(".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".in", ".hpp"),
output_directory=None,
includes=(),
ignores=(),
list_files_only=False,
show_progress=True,
):
assert os.path.exists(project_directory)
# If no output directory, provide a default one.
if not output_directory:
output_directory = os.path.join(project_directory, "hip")
all_files = list(
matched_files_iter(
project_directory, includes=includes, ignores=ignores, extensions=extensions
)
)
if list_files_only:
print(os.linesep.join(all_files))
return
# Start Preprocessor
preprocess(
project_directory, output_directory, all_files, show_progress=show_progress
)