threestudio/models/guidance/stable_diffusion_unified_guidance.py [142:189]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            if self.cfg.vsd_use_camera_condition:
                if self.cfg.vsd_camera_condition_type in ["extrinsics", "mvp"]:
                    self.camera_embedding_dim = 16
                elif self.cfg.vsd_camera_condition_type == "spherical":
                    self.camera_embedding_dim = 4
                else:
                    raise ValueError("Invalid camera condition type!")

                # FIXME: hard-coded output dim
                self.camera_embedding = ToDTypeWrapper(
                    TimestepEmbedding(self.camera_embedding_dim, 1280),
                    self.weights_dtype,
                ).to(self.device)
                pipe_phi.unet.class_embedding = self.camera_embedding

            if self.cfg.vsd_use_lora:
                # set up LoRA layers
                lora_attn_procs = {}
                for name in pipe_phi.unet.attn_processors.keys():
                    cross_attention_dim = (
                        None
                        if name.endswith("attn1.processor")
                        else pipe_phi.unet.config.cross_attention_dim
                    )
                    if name.startswith("mid_block"):
                        hidden_size = pipe_phi.unet.config.block_out_channels[-1]
                    elif name.startswith("up_blocks"):
                        block_id = int(name[len("up_blocks.")])
                        hidden_size = list(
                            reversed(pipe_phi.unet.config.block_out_channels)
                        )[block_id]
                    elif name.startswith("down_blocks"):
                        block_id = int(name[len("down_blocks.")])
                        hidden_size = pipe_phi.unet.config.block_out_channels[block_id]

                    lora_attn_procs[name] = LoRAAttnProcessor(
                        hidden_size=hidden_size, cross_attention_dim=cross_attention_dim
                    )

                pipe_phi.unet.set_attn_processor(lora_attn_procs)

                self.lora_layers = AttnProcsLayers(pipe_phi.unet.attn_processors).to(
                    self.device
                )
                self.lora_layers._load_state_dict_pre_hooks.clear()
                self.lora_layers._state_dict_hooks.clear()

        threestudio.info(f"Loaded Stable Diffusion!")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



threestudio/models/guidance/zero123_unified_guidance.py [130:177]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            if self.cfg.vsd_use_camera_condition:
                if self.cfg.vsd_camera_condition_type in ["extrinsics", "mvp"]:
                    self.camera_embedding_dim = 16
                elif self.cfg.vsd_camera_condition_type == "spherical":
                    self.camera_embedding_dim = 4
                else:
                    raise ValueError("Invalid camera condition type!")

                # FIXME: hard-coded output dim
                self.camera_embedding = ToDTypeWrapper(
                    TimestepEmbedding(self.camera_embedding_dim, 1280),
                    self.weights_dtype,
                ).to(self.device)
                pipe_phi.unet.class_embedding = self.camera_embedding

            if self.cfg.vsd_use_lora:
                # set up LoRA layers
                lora_attn_procs = {}
                for name in pipe_phi.unet.attn_processors.keys():
                    cross_attention_dim = (
                        None
                        if name.endswith("attn1.processor")
                        else pipe_phi.unet.config.cross_attention_dim
                    )
                    if name.startswith("mid_block"):
                        hidden_size = pipe_phi.unet.config.block_out_channels[-1]
                    elif name.startswith("up_blocks"):
                        block_id = int(name[len("up_blocks.")])
                        hidden_size = list(
                            reversed(pipe_phi.unet.config.block_out_channels)
                        )[block_id]
                    elif name.startswith("down_blocks"):
                        block_id = int(name[len("down_blocks.")])
                        hidden_size = pipe_phi.unet.config.block_out_channels[block_id]

                    lora_attn_procs[name] = LoRAAttnProcessor(
                        hidden_size=hidden_size, cross_attention_dim=cross_attention_dim
                    )

                pipe_phi.unet.set_attn_processor(lora_attn_procs)

                self.lora_layers = AttnProcsLayers(pipe_phi.unet.attn_processors).to(
                    self.device
                )
                self.lora_layers._load_state_dict_pre_hooks.clear()
                self.lora_layers._state_dict_hooks.clear()

        threestudio.info(f"Loaded Stable Diffusion!")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



