in src/controlnet_aux/zoe/zoedepth/models/zoedepth/zoedepth_v1.py [0:0]
def __init__(self, core, n_bins=64, bin_centers_type="softplus", bin_embedding_dim=128, min_depth=1e-3, max_depth=10,
n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp', min_temp=5, max_temp=50, train_midas=True,
midas_lr_factor=10, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs):
"""ZoeDepth model. This is the version of ZoeDepth that has a single metric head
Args:
core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features
n_bins (int, optional): Number of bin centers. Defaults to 64.
bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128.
min_depth (float, optional): Lower bound for normed bin centers. Defaults to 1e-3.
max_depth (float, optional): Upper bound for normed bin centers. Defaults to 10.
n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300.
attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'.
attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'.
min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5.
max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50.
train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True.
midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10.
encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10.
pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10.
"""
super().__init__()
self.core = core
self.max_depth = max_depth
self.min_depth = min_depth
self.min_temp = min_temp
self.bin_centers_type = bin_centers_type
self.midas_lr_factor = midas_lr_factor
self.encoder_lr_factor = encoder_lr_factor
self.pos_enc_lr_factor = pos_enc_lr_factor
self.train_midas = train_midas
self.inverse_midas = inverse_midas
if self.encoder_lr_factor <= 0:
self.core.freeze_encoder(
freeze_rel_pos=self.pos_enc_lr_factor <= 0)
N_MIDAS_OUT = 32
btlnck_features = self.core.output_channels[0]
num_out_features = self.core.output_channels[1:]
self.conv2 = nn.Conv2d(btlnck_features, btlnck_features,
kernel_size=1, stride=1, padding=0) # btlnck conv
if bin_centers_type == "normed":
SeedBinRegressorLayer = SeedBinRegressor
Attractor = AttractorLayer
elif bin_centers_type == "softplus":
SeedBinRegressorLayer = SeedBinRegressorUnnormed
Attractor = AttractorLayerUnnormed
elif bin_centers_type == "hybrid1":
SeedBinRegressorLayer = SeedBinRegressor
Attractor = AttractorLayerUnnormed
elif bin_centers_type == "hybrid2":
SeedBinRegressorLayer = SeedBinRegressorUnnormed
Attractor = AttractorLayer
else:
raise ValueError(
"bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'")
self.seed_bin_regressor = SeedBinRegressorLayer(
btlnck_features, n_bins=n_bins, min_depth=min_depth, max_depth=max_depth)
self.seed_projector = Projector(btlnck_features, bin_embedding_dim)
self.projectors = nn.ModuleList([
Projector(num_out, bin_embedding_dim)
for num_out in num_out_features
])
self.attractors = nn.ModuleList([
Attractor(bin_embedding_dim, n_bins, n_attractors=n_attractors[i], min_depth=min_depth, max_depth=max_depth,
alpha=attractor_alpha, gamma=attractor_gamma, kind=attractor_kind, attractor_type=attractor_type)
for i in range(len(num_out_features))
])
last_in = N_MIDAS_OUT + 1 # +1 for relative depth
# use log binomial instead of softmax
self.conditional_log_binomial = ConditionalLogBinomial(
last_in, bin_embedding_dim, n_classes=n_bins, min_temp=min_temp, max_temp=max_temp)