Status getTextSizeInBytes()

in compiler_gym/envs/llvm/service/Cost.cc [47:166]


Status getTextSizeInBytes(llvm::Module& module, int64_t* value,
                          const std::vector<std::string>& clangArgs,
                          const fs::path& workingDirectory) {
#else
Status getTextSizeInBytes(llvm::Module& module, int64_t* value, const fs::path& workingDirectory) {
#endif
  const auto clangPath = util::getSiteDataPath("llvm-v0/bin/clang");
  const auto llvmSizePath = util::getSiteDataPath("llvm-v0/bin/llvm-size");
  DCHECK(fs::exists(clangPath)) << fmt::format("File not found: {}", clangPath.string());
  DCHECK(fs::exists(llvmSizePath)) << fmt::format("File not found: {}", llvmSizePath.string());

  // Lower the module to an object file using clang and extract the .text
  // section size using llvm-size.
  const std::string ir = moduleToString(module);

  const auto tmpFile = fs::unique_path(workingDirectory / "obj-%%%%.o");
  std::string llvmSizeOutput;

  try {
// Use clang to compile the object file.
#ifdef COMPILER_GYM_EXPERIMENTAL_TEXT_SIZE_COST
    std::string clangCmd = fmt::format("{} -w -xir - -o {}", clangPath.string(), tmpFile.string());
    for (const auto& arg : clangArgs) {
      clangCmd += " " + arg;
    }
#else
    const std::string clangCmd =
        fmt::format("{} -w -xir - -o {} -c", clangPath.string(), tmpFile.string());
#endif

    boost::asio::io_service clangService;
    auto stdinBuffer{boost::asio::buffer(ir)};
    bp::async_pipe stdinPipe(clangService);
    boost::asio::io_context clangStderrStream;
    std::future<std::string> clangStderrFuture;

    bp::child clang(clangCmd, bp::std_in<stdinPipe, bp::std_out> bp::null,
                    bp::std_err > clangStderrFuture, clangStderrStream);

    // Write the IR to stdin.
    boost::asio::async_write(
        stdinPipe, stdinBuffer,
        [&](const boost::system::error_code& ec, std::size_t n) { stdinPipe.async_close(); });

    clangService.run_for(std::chrono::seconds(60));
    if (clangService.poll()) {
      return Status(StatusCode::INVALID_ARGUMENT,
                    fmt::format("Failed to compute .text size cost within 60 seconds"));
    }
    clang.wait();
    clangStderrStream.run();

    if (clang.exit_code()) {
      const std::string stderr = clangStderrFuture.get();
      return Status(StatusCode::INVALID_ARGUMENT,
                    fmt::format("Failed to compute .text size cost. "
                                "Command returned exit code {}: {}. Error: {}",
                                clang.exit_code(), clangCmd, stderr));
    }

    // Run llvm-size on the compiled file.
    const std::string llvmSizeCmd = fmt::format("{} {}", llvmSizePath.string(), tmpFile.string());

    boost::asio::io_context llvmSizeStdoutStream;
    std::future<std::string> llvmSizeStdoutFuture;

    bp::child llvmSize(llvmSizeCmd, bp::std_in.close(), bp::std_out > llvmSizeStdoutFuture,
                       bp::std_err > bp::null, llvmSizeStdoutStream);

    if (!util::wait_for(llvmSize, std::chrono::seconds(60))) {
      return Status(StatusCode::DEADLINE_EXCEEDED,
                    fmt::format("Failed to compute .text size cost within 60 seconds"));
    }

    llvmSizeStdoutStream.run();
    fs::remove(tmpFile);
    if (llvmSize.exit_code()) {
      return Status(StatusCode::INVALID_ARGUMENT, fmt::format("Failed to compute .text size cost. "
                                                              "Command returned exit code {}: {}",
                                                              llvmSize.exit_code(), llvmSizeCmd));
    }

    llvmSizeOutput = llvmSizeStdoutFuture.get();
  } catch (bp::process_error& e) {
    fs::remove(tmpFile);
    return Status(StatusCode::INVALID_ARGUMENT,
                  fmt::format("Failed to compute .text size cost: {}", e.what()));
  }

  // The output of llvm-size is in berkley format, e.g.:
  //
  //     $ llvm-size foo.o
  //     __TEXT __DATA __OBJC others dec hex
  //     127    0      0      32	   159 9f
  //
  // Skip the first line of output and read an integer from the start of the
  // second line:
  const size_t eol = llvmSizeOutput.find('\n');
  const size_t tab = llvmSizeOutput.find('\t', eol + 1);
  if (eol == std::string::npos || tab == std::string::npos) {
    return Status(StatusCode::INTERNAL,
                  fmt::format("Failed to parse .TEXT size: `{}`\n", llvmSizeOutput));
  }
  const std::string extracted = llvmSizeOutput.substr(eol, tab - eol);
  try {
    *value = std::stoi(extracted);
  } catch (std::exception const& e) {
    return Status(StatusCode::INTERNAL,
                  fmt::format("Failed to parse .TEXT size: `{}`\n", llvmSizeOutput));
  }
  return Status::OK;
}

inline size_t getBaselineCostIndex(LlvmBaselinePolicy policy, LlvmCostFunction cost) {
  return static_cast<size_t>(magic_enum::enum_count<LlvmCostFunction>()) *
             static_cast<size_t>(policy) +
         static_cast<size_t>(cost);
}

}  // anonymous namespace