in pyiceberg/io/pyarrow.py [0:0]
def _initialize_s3_fs(self, netloc: Optional[str]) -> FileSystem:
from pyarrow.fs import S3FileSystem
provided_region = get_first_property_value(self.properties, S3_REGION, AWS_REGION)
# Do this when we don't provide the region at all, or when we explicitly enable it
if provided_region is None or property_as_bool(self.properties, S3_RESOLVE_REGION, False) is True:
# Resolve region from netloc(bucket), fallback to user-provided region
# Only supported by buckets hosted by S3
bucket_region = _cached_resolve_s3_region(bucket=netloc) or provided_region
if provided_region is not None and bucket_region != provided_region:
logger.warning(
f"PyArrow FileIO overriding S3 bucket region for bucket {netloc}: "
f"provided region {provided_region}, actual region {bucket_region}"
)
else:
bucket_region = provided_region
client_kwargs: Dict[str, Any] = {
"endpoint_override": self.properties.get(S3_ENDPOINT),
"access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
"secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
"session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
"region": bucket_region,
}
if proxy_uri := self.properties.get(S3_PROXY_URI):
client_kwargs["proxy_options"] = proxy_uri
if connect_timeout := self.properties.get(S3_CONNECT_TIMEOUT):
client_kwargs["connect_timeout"] = float(connect_timeout)
if request_timeout := self.properties.get(S3_REQUEST_TIMEOUT):
client_kwargs["request_timeout"] = float(request_timeout)
if role_arn := get_first_property_value(self.properties, S3_ROLE_ARN, AWS_ROLE_ARN):
client_kwargs["role_arn"] = role_arn
if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME):
client_kwargs["session_name"] = session_name
if self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING) is not None:
client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, S3_FORCE_VIRTUAL_ADDRESSING, False)
return S3FileSystem(**client_kwargs)