setup/advancedConfig.yaml (115 lines of code) (raw):
# MUST READ :
# If you are creating a blobfuse2 config file using this kindly take care of below points
# 1. All boolean configs (true|false config) (except ignore-open-flags, virtual-directory) are set to 'false' by default.
# No need to mention them in your config file unless you are setting them to true.
# 2. 'loopbackfs' is purely for testing and shall not be used in production configuration.
# 3. 'block-cache' and 'file_cache' can not co-exist and config file shall have only one of them based on your use case.
# 4. By default log level is set to 'log_warning' level and are redirected to syslog.
# Either use 'base' logging or syslog filters to redirect logs to separate file.
# To install syslog filter follow below steps:
# sudo cp setup/11-blobfuse2.conf /etc/rsyslog.d/
# sudo cp setup/blobfuse2-logrotate /etc/logrotate.d/
# sudo service rsyslog restart
# 5. For non-HNS (flat namespace) accounts blobfuse expects special directory marker files to
# exists in container to identify a directory.
# If these files do not exist in container, then 'virtual-directory: true' in 'azstorage' section is required
# 6. By default 'writeback-cache' is enabled for libfuse3 and this may result in append/write operations to fail.
# Either you can disable 'writeback-cache', which might hurt the performance
# or you can configure blobfuse2 to ignore open flags given by user and make it work with ''writeback-cache'.
# 'libfuse' sectoin below has both the configurations.
# 7. If are you using 'allow-other: true' config then make sure user_allow_other is enabled in /etc/fuse.conf file as
# well otherwise mount will fail. By default /etc/fuse.conf will have this option disabled we just need to
# enable it and save the file.
# 8. If data in your storage account (non-HNS) is created using Blobfuse or AzCopy then there are marker files present
# in your container to mark a directory. In such cases you can optimize your listing by setting 'virtual-directory'
# flag to false in mount command.
# 9. If you are using 'file_cache' component then make sure you have enough disk space available for cache.
# 10. 'sdk-trace' has been removed with v2.3.0 release and setting log level to log_debug will auto enable these logs.
# -----------------------------------------------------------------------------------------------------------------------
# Daemon configuration
foreground: true|false <run blobfuse2 in foreground or background>
# Common configurations
allow-other: true|false <allow other users to access the mounted directory - used for FUSE and File Cache>
nonempty: true|false <allow mounting on non-empty directory>
# Dynamic profiler related configuration. This helps to root-cause high memory/cpu usage related issues.
dynamic-profile: true|false <allows to turn on dynamic profiler for cpu/memory usage monitoring. Only for debugging, shall not be used in production>
profiler-port: <port number for dynamic-profiler to listen for REST calls. Default - 6060>
profiler-ip: <IP address for dynamic-profiler to listen for REST calls. Default - localhost>
# Logger configuration
logging:
type: syslog|silent|base <type of logger to be used by the system. silent = no logger, base = file based logger. Default - syslog>
level: log_off|log_crit|log_err|log_warning|log_info|log_trace|log_debug <log level. Default - log_warning> <log_debug will also enable sdk-trace logs>
file-path: <path where log files shall be stored. Default - '$HOME/.blobfuse2/blobfuse2.log'>
max-file-size-mb: <maximum allowed size for each log file (in MB). Default - 512 MB>
file-count: <maximum number of files to be rotated to preserve old logs. Default - 10>
# Pipeline configuration. Choose components to be engaged. The order below is the priority order that needs to be followed.
components:
- libfuse
- entry_cache
- xload
- block_cache
- file_cache
- attr_cache
- azstorage
- loopbackfs
# Libfuse configuration
libfuse:
default-permission: 0777|0666|0644|0444 <default permissions to be presented for block blobs>
attribute-expiration-sec: <time kernel can cache inode attributes (in sec). Default - 120 sec>
entry-expiration-sec: <time kernel can cache directory listing attributes (in sec). Default - 120 sec>
negative-entry-expiration-sec: <time kernel can cache attributes of non existent paths (in sec). Default - 120 sec>
fuse-trace: true|false <enable libfuse api trace logs for debugging>
extension: <physical path to extension library>
direct-io: true|false <enable to bypass the kernel cache>
# Entry Cache configuration
entry_cache:
timeout-sec: <cache eviction timeout (in sec). Default - 30 sec>
# Xload configuration
xload:
block-size-mb: <size of each block to be cached in memory (in MB). Default - 16 MB>
path: <path to local disk cache where downloaded files will be stored>
export-progress: <preload progress will be exported to a json fil. Default output file is '~/.blobfuse2/xload_stats_{PID}.json'. Default - not exported>
validate-md5: <if md5 sum is present in the blob, validate it post download. Default - false>
# Block cache related configuration
block_cache:
block-size-mb: <size of each block to be cached in memory (in MB). Default - 16 MB>
mem-size-mb: <total amount of memory to be preallocated for block cache (in MB). Default - 80% of free memory>
path: <path to local disk cache where downloaded blocked will be stored>
disk-size-mb: <maximum disk cache size allowed. Default - 80% of free disk space>
disk-timeout-sec: <default disk cache eviction timeout (in sec). Default - 120 sec>
prefetch: <number of blocks to be prefetched in serial read case. Min - 11, Default - 2 times number of CPU cores>
parallelism: <number of parallel threads downloading the data and writing to disk cache. Default - 3 times number of CPU cores>
# Disk cache related configuration
file_cache:
# Required
path: <path to local disk cache>
# Optional
timeout-sec: <default cache eviction timeout (in sec). Default - 120 sec>
max-size-mb: <maximum cache size allowed. Default - 80% of free disk space>
allow-non-empty-temp: true|false <allow non empty temp directory at startup>
cleanup-on-start: true|false <cleanup the temp directory on startup, if its not empty>
sync-to-flush: true|false <sync call to a file will force upload of the contents to storage account>
refresh-sec: <number of seconds after which compare lmt of file in local cache and container and refresh file if container has the latest copy>
ignore-sync: true|false <sync call will be ignored and locally cached file will not be deleted>
hard-limit: true|false <if set to true, file-cache will not allow read/writes to file which exceed the configured limits>
# Attribute cache related configuration
attr_cache:
timeout-sec: <time attributes can be cached (in sec). Default - 120 sec>
no-symlinks: true|false <to improve performance disable symlink support. symlinks will be treated like regular files.>
# Loopback configuration
loopbackfs:
path: <path to local directory>
# Azure storage configuration
azstorage:
# Required
type: block|adls <type of storage account to be connected. Default - block>
account-name: <name of the storage account>
container: <name of the storage container to be mounted>
endpoint: <specify this parameter only if storage account is behind a private endpoint>
mode: key|sas|spn|msi|azcli <kind of authentication to be used>
account-key: <storage account key>
# OR
sas: <storage account sas>
# OR
appid: <storage account app id / client id for MSI>
resid: <storage account resource id for MSI>
objid: <object id for MSI - needs Azure CLI on system>
# OR
tenantid: <storage account tenant id for SPN>
clientid: <storage account client id for SPN>
clientsecret: <storage account client secret for SPN>
oauth-token-path: <path to file containing the OAuth token>
workload-identity-token: <service account token for workload identity>
# Optional
use-http: true|false <use http instead of https for storage connection>
aadendpoint: <storage account custom aad endpoint>
subdirectory: <name of subdirectory to be mounted instead of whole container>
block-size-mb: <size of each block (in MB). Default - 16 MB>
max-concurrency: <number of parallel upload/download threads. Default - 32>
tier: hot|cool|cold|premium|archive|none <blob-tier to be set while uploading a blob. Default - none>
block-list-on-mount-sec: <time list api to be blocked after mount (in sec). Default - 0 sec>
max-retries: <number of retries to attempt for any operation failure. Default - 5>
max-retry-timeout-sec: <maximum timeout allowed for a given retry (in sec). Default - 900 sec>
retry-backoff-sec: <retry backoff between two tries (in sec). Default - 4 sec>
max-retry-delay-sec: <maximum delay between two tries (in sec). Default - 60 sec>
http-proxy: ip-address:port <http proxy to be used for connection>
https-proxy: ip-address:port <https proxy to be used for connection>
fail-unsupported-op: true|false <for block blob account return failure for unsupported operations like chmod and chown>
auth-resource: <resource string to be used during OAuth token retrieval>
update-md5: true|false <set md5 sum on upload. Impacts performance. works only when file-cache component is part of the pipeline>
validate-md5: true|false <validate md5 on download. Impacts performance. works only when file-cache component is part of the pipeline>
disable-compression: true|false <disable transport layer content encoding like gzip, set this flag to true if blobs have content-encoding set in container>
telemetry : <additional information that customer want to push in user-agent>
honour-acl: true|false <honour ACLs on files and directories when mounted using MSI Auth and object-ID is provided in config>
cpk-enabled: true|false <enable client provided key encryption>
cpk-encryption-key: <customer provided base64-encoded AES-256 encryption key value>
cpk-encryption-key-sha256: <customer provided base64-encoded sha256 of the encryption key>
preserve-acl: true|false <preserve ACLs and Permissions set on file during updates>
# Mount all configuration
mountall:
# allowlist takes precedence over denylist in case of conflicts
container-allowlist:
- <list of containers to be mounted>
container-denylist:
- <list of containers not to be mounted>
# Health Monitor configuration
health_monitor:
enable-monitoring: true|false <enable health monitor>
stats-poll-interval-sec: <Blobfuse2 stats polling interval (in sec). Default - 10 sec>
process-monitor-interval-sec: <CPU, memory and network usage polling interval (in sec). Default - 30 sec>
output-path: <Path where health monitor will generate its output file. File name will be monitor_<pid>.json>
# list of monitors to be disabled
monitor-disable-list:
- blobfuse_stats <Disable blobfuse2 stats polling>
- file_cache_monitor <Disable file cache directory monitor>
- cpu_profiler <Disable CPU monitoring on blobfuse2 process>
- memory_profiler <Disable memory monitoring on blobfuse2 process>
- network_profiler <Disable network monitoring on blobfuse2 process>