Status AzureOptions::ExtractFromUriQuery()

in cpp/src/arrow/filesystem/azurefs.cc [103:233]


Status AzureOptions::ExtractFromUriQuery(const Uri& uri) {
  std::optional<CredentialKind> credential_kind;
  std::optional<std::string> credential_kind_value;
  std::string tenant_id;
  std::string client_id;
  std::string client_secret;

  // These query parameters are the union of the following docs:
  // https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas#specify-the-account-sas-parameters
  // https://learn.microsoft.com/en-us/rest/api/storageservices/create-service-sas#construct-a-service-sas
  // (excluding parameters for table storage only)
  // https://learn.microsoft.com/en-us/rest/api/storageservices/create-user-delegation-sas#construct-a-user-delegation-sas
  static const std::set<std::string> sas_token_query_parameters = {
      "sv",    "ss",    "sr",  "st",  "se",   "sp",   "si",   "sip",   "spr",
      "skoid", "sktid", "srt", "skt", "ske",  "skv",  "sks",  "saoid", "suoid",
      "scid",  "sdd",   "ses", "sig", "rscc", "rscd", "rsce", "rscl",  "rsct",
  };

  ARROW_ASSIGN_OR_RAISE(const auto options_items, uri.query_items());
  for (const auto& kv : options_items) {
    if (kv.first == "blob_storage_authority") {
      blob_storage_authority = kv.second;
    } else if (kv.first == "dfs_storage_authority") {
      dfs_storage_authority = kv.second;
    } else if (kv.first == "credential_kind") {
      if (kv.second == "default") {
        credential_kind = CredentialKind::kDefault;
      } else if (kv.second == "anonymous") {
        credential_kind = CredentialKind::kAnonymous;
      } else if (kv.second == "cli") {
        credential_kind = CredentialKind::kCLI;
      } else if (kv.second == "workload_identity") {
        credential_kind = CredentialKind::kWorkloadIdentity;
      } else if (kv.second == "environment") {
        credential_kind = CredentialKind::kEnvironment;
      } else {
        // Other credential kinds should be inferred from the given
        // parameters automatically.
        return Status::Invalid("Unexpected credential_kind: '", kv.second, "'");
      }
      credential_kind_value = kv.second;
    } else if (kv.first == "tenant_id") {
      tenant_id = kv.second;
    } else if (kv.first == "client_id") {
      client_id = kv.second;
    } else if (kv.first == "client_secret") {
      client_secret = kv.second;
    } else if (kv.first == "enable_tls") {
      ARROW_ASSIGN_OR_RAISE(auto enable_tls, ::arrow::internal::ParseBoolean(kv.second));
      if (enable_tls) {
        blob_storage_scheme = "https";
        dfs_storage_scheme = "https";
      } else {
        blob_storage_scheme = "http";
        dfs_storage_scheme = "http";
      }
    } else if (kv.first == "background_writes") {
      ARROW_ASSIGN_OR_RAISE(background_writes,
                            ::arrow::internal::ParseBoolean(kv.second));
    } else if (sas_token_query_parameters.find(kv.first) !=
               sas_token_query_parameters.end()) {
      credential_kind = CredentialKind::kSASToken;
    } else {
      return Status::Invalid(
          "Unexpected query parameter in Azure Blob File System URI: '", kv.first, "'");
    }
  }

  if (credential_kind) {
    if (!tenant_id.empty()) {
      return Status::Invalid("tenant_id must not be specified with credential_kind=",
                             *credential_kind_value);
    }
    if (!client_id.empty()) {
      return Status::Invalid("client_id must not be specified with credential_kind=",
                             *credential_kind_value);
    }
    if (!client_secret.empty()) {
      return Status::Invalid("client_secret must not be specified with credential_kind=",
                             *credential_kind_value);
    }

    switch (*credential_kind) {
      case CredentialKind::kAnonymous:
        RETURN_NOT_OK(ConfigureAnonymousCredential());
        break;
      case CredentialKind::kCLI:
        RETURN_NOT_OK(ConfigureCLICredential());
        break;
      case CredentialKind::kWorkloadIdentity:
        RETURN_NOT_OK(ConfigureWorkloadIdentityCredential());
        break;
      case CredentialKind::kEnvironment:
        RETURN_NOT_OK(ConfigureEnvironmentCredential());
        break;
      case CredentialKind::kSASToken:
        // Reconstructing the SAS token without the other URI query parameters is awkward
        // because some parts are URI escaped and some parts are not. Instead we just
        // pass through the entire query string and Azure ignores the extra query
        // parameters.
        RETURN_NOT_OK(ConfigureSASCredential("?" + uri.query_string()));
        break;
      default:
        // Default credential
        break;
    }
  } else {
    if (tenant_id.empty() && client_id.empty() && client_secret.empty()) {
      // No related parameters
      if (account_name.empty()) {
        RETURN_NOT_OK(ConfigureAnonymousCredential());
      } else {
        // Default credential
      }
    } else {
      // One or more tenant_id, client_id or client_secret are specified
      if (client_id.empty()) {
        return Status::Invalid("client_id must be specified");
      }
      if (tenant_id.empty() && client_secret.empty()) {
        RETURN_NOT_OK(ConfigureManagedIdentityCredential(client_id));
      } else if (!tenant_id.empty() && !client_secret.empty()) {
        RETURN_NOT_OK(
            ConfigureClientSecretCredential(tenant_id, client_id, client_secret));
      } else {
        return Status::Invalid("Both of tenant_id and client_secret must be specified");
      }
    }
  }
  return Status::OK();
}