bool AdamFullCost::Evaluate()

in visualization/FitAdam/src/AdamFastCost.cpp [326:977]


bool AdamFullCost::Evaluate(double const* const* parameters,
        double* residuals,
        double** jacobians) const
{
// const auto start_iter = std::chrono::high_resolution_clock::now();
    using namespace Eigen;
    typedef double T;
    const T* t = parameters[0];
    const T* p_eulers = parameters[1];
    const T* c = parameters[2];
    const T* face_coeff = fit_face_exp? parameters[3]: nullptr;

    Map< const Vector3d > t_vec(t);
    Map< const Matrix<double, Dynamic, 1> > c_bodyshape(c, TotalModel::NUM_SHAPE_COEFFICIENTS);

    // 0st step: Compute all the current joints
    Matrix<double, TotalModel::NUM_JOINTS, 3, RowMajor> J;
    Map< Matrix<double, Dynamic, 1> > J_vec(J.data(), TotalModel::NUM_JOINTS * 3);
    J_vec = fit_data_.adam.J_mu_ + fit_data_.adam.dJdc_ * c_bodyshape;

    // 1st step: forward kinematics
    const int num_t = (TotalModel::NUM_JOINTS) * 3 * 5;  // transform 3 * 4 + joint location 3 * 1

    // Matrix<double, Dynamic, 3 * TotalModel::NUM_JOINTS, RowMajor> old_dTrdP(num_t, 3 * TotalModel::NUM_JOINTS);
    // Matrix<double, Dynamic, 3 * TotalModel::NUM_JOINTS, RowMajor> old_dTrdJ(num_t, 3 * TotalModel::NUM_JOINTS);
    // old_dTrdP.setZero(); old_dTrdJ.setZero();
    // VectorXd old_transforms_joint(3 * TotalModel::NUM_JOINTS * 4 + 3 * TotalModel::NUM_JOINTS); // the first part is transform, the second part is outJoint
    // ceres::AutoDiffCostFunction<smpl::PoseToTransformsNoLR_Eulers_adamModel,
    // (TotalModel::NUM_JOINTS) * 3 * 4 + 3 * TotalModel::NUM_JOINTS,
    // (TotalModel::NUM_JOINTS) * 3,
    // (TotalModel::NUM_JOINTS) * 3> old_p2t(new smpl::PoseToTransformsNoLR_Eulers_adamModel(fit_data_.adam));
    // const double* old_p2t_parameters[2] = { p_eulers, J.data() };
    // double* old_p2t_residuals = old_transforms_joint.data();
    // double* old_p2t_jacobians[2] = { old_dTrdP.data(), old_dTrdJ.data() };
    // old_p2t.Evaluate(old_p2t_parameters, old_p2t_residuals, old_p2t_jacobians);

    Matrix<double, Dynamic, 3 * TotalModel::NUM_JOINTS, RowMajor> dTrdP(num_t, 3 * TotalModel::NUM_JOINTS);
    Matrix<double, Dynamic, 3 * TotalModel::NUM_JOINTS, RowMajor> dTrdJ(num_t, 3 * TotalModel::NUM_JOINTS);

    VectorXd transforms_joint(3 * TotalModel::NUM_JOINTS * 4 + 3 * TotalModel::NUM_JOINTS);
    const double* p2t_parameters[2] = { p_eulers, J.data() };
    double* p2t_residuals = transforms_joint.data();
    double* p2t_jacobians[2] = { dTrdP.data(), dTrdJ.data() };

    smpl::PoseToTransform_AdamFull_withDiff p2t(fit_data_.adam, parentIndexes, euler_);
// const auto start_FK = std::chrono::high_resolution_clock::now();
    p2t.Evaluate(p2t_parameters, p2t_residuals, jacobians ? p2t_jacobians : nullptr );
// const auto duration_FK = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - start_FK).count();

    // std::cout << "J" << std::endl;
    // std::cout << "max diff: " << (old_transforms_joint - transforms_joint).maxCoeff() << std::endl;
    // std::cout << "min diff: " << (old_transforms_joint - transforms_joint).minCoeff() << std::endl;
    // std::cout << "dJdP" << std::endl;
    // std::cout << "max diff: " << (old_dTrdP - dTrdP).maxCoeff() << std::endl;
    // std::cout << "min diff: " << (old_dTrdP - dTrdP).minCoeff() << std::endl;
    // std::cout << "dJdJ" << std::endl;
    // std::cout << "max diff: " << (old_dTrdJ - dTrdJ).maxCoeff() << std::endl;
    // std::cout << "min diff: " << (old_dTrdJ - dTrdJ).minCoeff() << std::endl;

// const auto start_transJ = std::chrono::high_resolution_clock::now();
    // MatrixXdr dTJdP = dTrdP.block(3 * TotalModel::NUM_JOINTS * 4, 0, 3 * TotalModel::NUM_JOINTS, 3 * TotalModel::NUM_JOINTS);
    Map<MatrixXdr> dTJdP(dTrdP.data() + 3 * TotalModel::NUM_JOINTS * 4 * 3 * TotalModel::NUM_JOINTS, 3 * TotalModel::NUM_JOINTS, 3 * TotalModel::NUM_JOINTS);
    // The following lines are equiv to MatrixXdr dTrdc = dTrdJ * fit_data_.adam.dJdc_;
    MatrixXdr dTrdc(num_t, TotalModel::NUM_SHAPE_COEFFICIENTS);
    if (jacobians) ComputedTrdc(dTrdJ.data(), fit_data_.adam.dJdc_.data(), dTrdc.data(), parentIndexes);
    // MatrixXdr dTJdc = dTrdc.block(3 * TotalModel::NUM_JOINTS * 4, 0, 3 * TotalModel::NUM_JOINTS, TotalModel::NUM_SHAPE_COEFFICIENTS);
    Map<MatrixXdr> dTJdc(dTrdc.data() + 3 * TotalModel::NUM_JOINTS * 4 * TotalModel::NUM_SHAPE_COEFFICIENTS, 3 * TotalModel::NUM_JOINTS, TotalModel::NUM_SHAPE_COEFFICIENTS);
    VectorXd outJoint = transforms_joint.block(3 * TotalModel::NUM_JOINTS * 4, 0, 3 * TotalModel::NUM_JOINTS, 1);  // outJoint
    for (auto i = 0u; i < TotalModel::NUM_JOINTS; i++) outJoint.block(3 * i, 0, 3, 1) += t_vec;
// const auto duration_transJ = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - start_transJ).count();

    MatrixXdr outVert(total_vertex.size(), 3);
    Map<MatrixXdr> dVdP(dVdP_data, 3 * total_vertex.size(), TotalModel::NUM_POSE_PARAMETERS);
    Map<MatrixXdr> dVdc(dVdc_data, 3 * total_vertex.size(), TotalModel::NUM_SHAPE_COEFFICIENTS);
    Map<MatrixXdr> dVdfc(dVdfc_data, 3 * total_vertex.size(), TotalModel::NUM_EXP_BASIS_COEFFICIENTS);
// const auto start_LBS = std::chrono::high_resolution_clock::now();
    if (jacobians) select_lbs(c, transforms_joint, dTrdP, dTrdc, outVert, dVdP_data, dVdc_data, face_coeff, dVdfc_data);
    else select_lbs(c, transforms_joint, outVert, face_coeff);
// const auto duration_LBS = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - start_LBS).count();
    outVert.rowwise() += t_vec.transpose();
    std::array<double*, 3> out_data{{ outJoint.data(), outVert.data(), nullptr }};
    std::array<Map<MatrixXdr>*, 3> dodP = {{ &dTJdP, &dVdP, nullptr }};  // array of reference is not allowed, only array of pointer
    std::array<Map<MatrixXdr>*, 3> dodc = {{ &dTJdc, &dVdc, nullptr }};
    Map<MatrixXdr> dTJdfc(dTJdfc_data, 3 * TotalModel::NUM_JOINTS, TotalModel::NUM_EXP_BASIS_COEFFICIENTS); 
    std::array<Map<MatrixXdr>*, 3> dodfc = {{ &dTJdfc, &dVdfc, nullptr }};

    // 2nd step: compute the target joints (copy from FK)
// const auto start_target = std::chrono::high_resolution_clock::now();
    // Arrange the Output Joints & Vertex to the order of constraints
    VectorXd tempJoints(3 * (m_nCorrespond_adam2joints + m_nCorrespond_adam2pts));
    auto* tempJointsPtr = tempJoints.data();
    Map<MatrixXdr> dOdP(dOdP_data, 3 * (m_nCorrespond_adam2joints + m_nCorrespond_adam2pts), TotalModel::NUM_POSE_PARAMETERS);
    Map<MatrixXdr> dOdc(dOdc_data, 3 * (m_nCorrespond_adam2joints + m_nCorrespond_adam2pts), TotalModel::NUM_SHAPE_COEFFICIENTS);
    Map<MatrixXdr> dOdfc(dOdfc_data, 3 * (m_nCorrespond_adam2joints + m_nCorrespond_adam2pts), TotalModel::NUM_EXP_BASIS_COEFFICIENTS);
    if (regressor_type == 0)
    {
        for (int i = 0; i < fit_data_.adam.m_indices_jointConst_adamIdx.rows(); i++)
        {
            tempJoints.block(3 * i, 0, 3, 1) = outJoint.block(3 * fit_data_.adam.m_indices_jointConst_adamIdx(i), 0, 3, 1);
        }
        int offset = fit_data_.adam.m_indices_jointConst_adamIdx.rows();
        for (int i = 0; i < fit_data_.adam.m_correspond_adam2lHand_adamIdx.rows(); i++)
        {
            tempJoints.block(3*(i + offset), 0, 3, 1) = outJoint.block(3 * fit_data_.adam.m_correspond_adam2lHand_adamIdx(i), 0, 3, 1);
        }
        offset += fit_data_.adam.m_correspond_adam2lHand_adamIdx.rows();
        for (int i = 0; i < fit_data_.adam.m_correspond_adam2rHand_adamIdx.rows(); i++)
        {
            tempJoints.block(3*(i + offset), 0, 3, 1) = outJoint.block(3 * fit_data_.adam.m_correspond_adam2rHand_adamIdx(i), 0, 3, 1);
        }
        offset += fit_data_.adam.m_correspond_adam2rHand_adamIdx.rows();
        for (int i = 0; i < corres_vertex2targetpt.size(); i++)
        {
            tempJoints.block(3*(i + offset), 0, 3, 1) = outVert.row(i).transpose();
        }

        if (jacobians)
        {
            int offset = 0;
            for (int i = 0; i < fit_data_.adam.m_indices_jointConst_adamIdx.rows(); i++)
            {
                std::copy(dTJdP.data() + 3 * fit_data_.adam.m_indices_jointConst_adamIdx(i) * TotalModel::NUM_POSE_PARAMETERS,
                          dTJdP.data() + 3 * (fit_data_.adam.m_indices_jointConst_adamIdx(i) + 1) * TotalModel::NUM_POSE_PARAMETERS,
                          dOdP.data() + 3 * (i + offset) * TotalModel::NUM_POSE_PARAMETERS);
                std::copy(dTJdc.data() + 3 * fit_data_.adam.m_indices_jointConst_adamIdx(i) * TotalModel::NUM_SHAPE_COEFFICIENTS,
                          dTJdc.data() + 3 * (fit_data_.adam.m_indices_jointConst_adamIdx(i) + 1) * TotalModel::NUM_SHAPE_COEFFICIENTS,
                          dOdc.data() + 3 * (i + offset) * TotalModel::NUM_SHAPE_COEFFICIENTS);
            }
            offset += fit_data_.adam.m_indices_jointConst_adamIdx.rows();
            for (int i = 0; i < fit_data_.adam.m_correspond_adam2lHand_adamIdx.rows(); i++)
            {
                std::copy(dTJdP.data() + 3 * fit_data_.adam.m_correspond_adam2lHand_adamIdx(i) * TotalModel::NUM_POSE_PARAMETERS,
                          dTJdP.data() + 3 * (fit_data_.adam.m_correspond_adam2lHand_adamIdx(i) + 1) * TotalModel::NUM_POSE_PARAMETERS,
                          dOdP.data() + 3 * (i + offset) * TotalModel::NUM_POSE_PARAMETERS);
                std::copy(dTJdc.data() + 3 * fit_data_.adam.m_correspond_adam2lHand_adamIdx(i) * TotalModel::NUM_SHAPE_COEFFICIENTS,
                          dTJdc.data() + 3 * (fit_data_.adam.m_correspond_adam2lHand_adamIdx(i) + 1) * TotalModel::NUM_SHAPE_COEFFICIENTS,
                          dOdc.data() + 3 * (i + offset) * TotalModel::NUM_SHAPE_COEFFICIENTS);
            }
            offset += fit_data_.adam.m_correspond_adam2lHand_adamIdx.rows();
            for (int i = 0; i < fit_data_.adam.m_correspond_adam2rHand_adamIdx.rows(); i++)
            {
                std::copy(dTJdP.data() + 3 * fit_data_.adam.m_correspond_adam2rHand_adamIdx(i) * TotalModel::NUM_POSE_PARAMETERS,
                          dTJdP.data() + 3 * (fit_data_.adam.m_correspond_adam2rHand_adamIdx(i) + 1) * TotalModel::NUM_POSE_PARAMETERS,
                          dOdP.data() + 3 * (i + offset) * TotalModel::NUM_POSE_PARAMETERS);
                std::copy(dTJdc.data() + 3 * fit_data_.adam.m_correspond_adam2rHand_adamIdx(i) * TotalModel::NUM_SHAPE_COEFFICIENTS,
                          dTJdc.data() + 3 * (fit_data_.adam.m_correspond_adam2rHand_adamIdx(i) + 1) * TotalModel::NUM_SHAPE_COEFFICIENTS,
                          dOdc.data() + 3 * (i + offset) * TotalModel::NUM_SHAPE_COEFFICIENTS);
            }
            offset += fit_data_.adam.m_correspond_adam2rHand_adamIdx.rows();
            std::copy(dVdP_data, dVdP_data + 3 * corres_vertex2targetpt.size() * TotalModel::NUM_POSE_PARAMETERS,
                      dOdP.data() + 3 * offset * TotalModel::NUM_POSE_PARAMETERS);
            std::copy(dVdc_data, dVdc_data + 3 * corres_vertex2targetpt.size() * TotalModel::NUM_SHAPE_COEFFICIENTS,
                      dOdc.data() + 3 * offset * TotalModel::NUM_SHAPE_COEFFICIENTS);

            if (fit_face_exp)
            {
                std::fill(dOdfc_data, dOdfc_data + 3 * m_nCorrespond_adam2joints * TotalModel::NUM_EXP_BASIS_COEFFICIENTS, 0.0);
                std::copy(dVdfc_data, dVdfc_data + 3 * m_nCorrespond_adam2pts * TotalModel::NUM_EXP_BASIS_COEFFICIENTS,
                          dOdfc_data + 3 * m_nCorrespond_adam2joints * TotalModel::NUM_EXP_BASIS_COEFFICIENTS);
            }
        }
    }
    else if (regressor_type == 1) // use Human 3.6M regressor
    {
        if(jacobians) SparseRegress(fit_data_.adam.m_cocoplus_reg, outVert.data(), dVdP_data, dVdc_data, dVdfc_data, tempJoints.data(), dOdP.data(), dOdc.data(), dOdfc.data());
        else SparseRegress(fit_data_.adam.m_cocoplus_reg, outVert.data(), nullptr, nullptr, nullptr, tempJoints.data(), nullptr, nullptr, nullptr);
        if (fit_data_.fit_surface)
        {
            for (auto i = 0; i < corres_vertex2targetpt.size(); i++)  // surface (vertex) constraints
            {
                tempJoints[(m_nCorrespond_adam2joints + i) * 3 + 0] = outVert(corres_vertex2targetpt[i].first, 0);
                tempJoints[(m_nCorrespond_adam2joints + i) * 3 + 1] = outVert(corres_vertex2targetpt[i].first, 1);
                tempJoints[(m_nCorrespond_adam2joints + i) * 3 + 2] = outVert(corres_vertex2targetpt[i].first, 2);
            }
        }
        out_data[2] = tempJoints.data();
        if (jacobians)
        {
            dodP[2] = &dOdP;
            dodc[2] = &dOdc;
        }
    }
    else
    {
        assert(regressor_type == 2); // use COCO regressor
        Map< const Matrix<double, Dynamic, Dynamic, RowMajor> > pose_param(p_eulers, TotalModel::NUM_JOINTS, 3);
        if(jacobians) SparseRegress(fit_data_.adam.m_small_coco_reg, outVert.data(), dVdP_data, dVdc_data, dVdfc_data, tempJoints.data(), dOdP.data(), dOdc.data(), dOdfc.data());
        else SparseRegress(fit_data_.adam.m_small_coco_reg, outVert.data(), nullptr, nullptr, nullptr, tempJoints.data(), nullptr, nullptr, nullptr);
        // SparseRegressor only set the data for body & face, we need to copy finger data from FK output
        std::copy(outJoint.data() + 3 * 22, outJoint.data() + 3 * 62,  tempJoints.data() + 3 * fit_data_.adam.coco_jointConst_smcIdx.size()); // 22-42 are left hand, 42 - 62 are right hand
        // copy foot & face vertex
        for (auto i = 0; i < corres_vertex2targetpt.size(); i++)
        {
            tempJoints[(m_nCorrespond_adam2joints + i) * 3 + 0] = outVert(corres_vertex2targetpt[i].first, 0);
            tempJoints[(m_nCorrespond_adam2joints + i) * 3 + 1] = outVert(corres_vertex2targetpt[i].first, 1);
            tempJoints[(m_nCorrespond_adam2joints + i) * 3 + 2] = outVert(corres_vertex2targetpt[i].first, 2);
        }
        out_data[2] = tempJoints.data();
        if (jacobians)
        {
            std::copy(dTJdP.data() + 3 * 22 * TotalModel::NUM_POSE_PARAMETERS, dTJdP.data() + 3 * 62 * TotalModel::NUM_POSE_PARAMETERS,
                      dOdP_data + 3 * fit_data_.adam.coco_jointConst_smcIdx.size() * TotalModel::NUM_POSE_PARAMETERS);
            std::copy(dTJdc.data() + 3 * 22 * TotalModel::NUM_SHAPE_COEFFICIENTS, dTJdc.data() + 3 * 62 * TotalModel::NUM_SHAPE_COEFFICIENTS,
                      dOdc_data + 3 * fit_data_.adam.coco_jointConst_smcIdx.size() * TotalModel::NUM_SHAPE_COEFFICIENTS);
            for (auto i = 0; i < corres_vertex2targetpt.size(); i++)
            {
                std::copy(dVdP_data + (corres_vertex2targetpt[i].first) * 3 * TotalModel::NUM_POSE_PARAMETERS,
                          dVdP_data + (corres_vertex2targetpt[i].first + 1) * 3 * TotalModel::NUM_POSE_PARAMETERS,
                          dOdP_data + (m_nCorrespond_adam2joints + i) * 3 * TotalModel::NUM_POSE_PARAMETERS);
                std::copy(dVdc_data + (corres_vertex2targetpt[i].first) * 3 * TotalModel::NUM_SHAPE_COEFFICIENTS,
                          dVdc_data + (corres_vertex2targetpt[i].first + 1) * 3 * TotalModel::NUM_SHAPE_COEFFICIENTS,
                          dOdc_data + (m_nCorrespond_adam2joints + i) * 3 * TotalModel::NUM_SHAPE_COEFFICIENTS);
            }
            dodP[2] = &dOdP;
            dodc[2] = &dOdc;

            if (fit_face_exp)
            {
                // only consider face70 for face exp, w/o COCO face
                std::fill(dOdfc_data, dOdfc_data + m_nCorrespond_adam2joints * 3 * TotalModel::NUM_EXP_BASIS_COEFFICIENTS, 0.0);
                for (auto i = 0; i < corres_vertex2targetpt.size(); i++)
                {
                    std::copy(dVdfc_data + (corres_vertex2targetpt[i].first) * 3 * TotalModel::NUM_EXP_BASIS_COEFFICIENTS,
                              dVdfc_data + (corres_vertex2targetpt[i].first + 1) * 3 * TotalModel::NUM_EXP_BASIS_COEFFICIENTS,
                              dOdfc_data + (m_nCorrespond_adam2joints + i) * 3 * TotalModel::NUM_EXP_BASIS_COEFFICIENTS);
                }
                dodfc[2] = &dOdfc;
            }
        }
    }
// const auto duration_target = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - start_target).count();

    // 3rd step: set residuals
    Map< VectorXd > res(residuals, m_nResiduals);
    const auto* targetPts = m_targetPts.data();
    const auto* targetPtsWeight = m_targetPts_weight.data();
// const auto start_res = std::chrono::high_resolution_clock::now();
    if (fit_data_.fit3D)  // put constrains on 3D
    {
        for(int i = 0; i < m_nCorrespond_adam2joints + m_nCorrespond_adam2pts; i++)
        {
            if (targetPts[5 * i] == 0 && targetPts[5 * i + 1] == 0 && targetPts[5 * i + 2] == 0) res.block(res_dim * i, 0, 3, 1).setZero();
            else res.block(res_dim * i, 0, 3, 1) = m_targetPts_weight.block(5 * i, 0, 3, 1).array() * (tempJoints.block(3 * i, 0, 3, 1) - m_targetPts.block(5 * i, 0, 3, 1)).array();
        }
    }

    if (fit_data_.fit2D)
    {
        Eigen::Map< Matrix<double, Dynamic, 3, RowMajor> > jointArray(tempJoints.data(), m_nCorrespond_adam2joints + m_nCorrespond_adam2pts, 3);
        // const Eigen::Map< const Matrix<double, 3, 3, RowMajor> > K(fit_data_.K);
        const Eigen::Map< const Matrix<double, 3, 3> > K(fit_data_.K);
        const MatrixXdr jointProjection = jointArray * K;
        const auto* JP = jointProjection.data();
        for(int i = 0; i < m_nCorrespond_adam2joints + m_nCorrespond_adam2pts; i++)
        {
            if (targetPts[5 * i + 3] == 0 && targetPts[5 * i + 4] == 0) res.block(res_dim * i + start_2d_dim, 0, 2, 1).setZero();
            else
            {
                // the following two lines are equivalent to
                // residuals[res_dim * i + start_2d_dim + 0] = (jointProjection(i, 0) / jointProjection(i, 2) - m_targetPts(5 * i + 3)) * m_targetPts_weight[res_dim * i + start_2d_dim + 0];
                // residuals[res_dim * i + start_2d_dim + 1] = (jointProjection(i, 1) / jointProjection(i, 2) - m_targetPts(5 * i + 4)) * m_targetPts_weight[res_dim * i + start_2d_dim + 1];
                residuals[res_dim * i + start_2d_dim + 0] = (JP[3 * i + 0] / JP[3 * i + 2] - targetPts[5 * i + 3]) * m_targetPts_weight[5 * i + 3];
                residuals[res_dim * i + start_2d_dim + 1] = (JP[3 * i + 1] / JP[3 * i + 2] - targetPts[5 * i + 4]) * m_targetPts_weight[5 * i + 4];
            }
        }
    }

    if (fit_data_.fitPAF)
    {
        const int offset = start_PAF;
        for (auto i = 0; i < num_PAF_constraint; i++)
        {
            if (fit_data_.PAF.col(i).isZero(0))
            {
                residuals[offset + 3 * i + 0] = residuals[offset + 3 * i + 1] = residuals[offset + 3 * i + 2] = 0.0;
                continue;
            }
            const std::array<double, 3> AB{{
                out_data[PAF_connection[i][2]][3 * PAF_connection[i][3] + 0] - out_data[PAF_connection[i][0]][3 * PAF_connection[i][1] + 0], 
                out_data[PAF_connection[i][2]][3 * PAF_connection[i][3] + 1] - out_data[PAF_connection[i][0]][3 * PAF_connection[i][1] + 1], 
                out_data[PAF_connection[i][2]][3 * PAF_connection[i][3] + 2] - out_data[PAF_connection[i][0]][3 * PAF_connection[i][1] + 2], 
            }};
            const auto length = sqrt(AB[0] * AB[0] + AB[1] * AB[1] + AB[2] * AB[2]);
            residuals[offset + 3 * i + 0] = (AB[0] / length - fit_data_.PAF(0, i)) * PAF_weight[i];
            residuals[offset + 3 * i + 1] = (AB[1] / length - fit_data_.PAF(1, i)) * PAF_weight[i];
            residuals[offset + 3 * i + 2] = (AB[2] / length - fit_data_.PAF(2, i)) * PAF_weight[i];
        }
    }

    int offset_inner = start_inner;
    if (fit_data_.inner_weight[0] > 0)
    {
        // the first defined inner constraints: should not bend (adam joint 6 should be close to the middle of central hip and neck)
        residuals[offset_inner + 0] = (outJoint.data()[3 * 6 + 0] - 0.5 * (outJoint.data()[3 * 0 + 0] + outJoint.data()[3 * 12 + 0])) * fit_data_.inner_weight[0];
        residuals[offset_inner + 1] = (outJoint.data()[3 * 6 + 1] - 0.5 * (outJoint.data()[3 * 0 + 1] + outJoint.data()[3 * 12 + 1])) * fit_data_.inner_weight[0];
        residuals[offset_inner + 2] = (outJoint.data()[3 * 6 + 2] - 0.5 * (outJoint.data()[3 * 0 + 2] + outJoint.data()[3 * 12 + 2])) * fit_data_.inner_weight[0];
    }
    else
    {
        residuals[offset_inner + 0] = residuals[offset_inner + 1] = residuals[offset_inner + 2] = 0.0;
    }
// const auto duration_res = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - start_res).count();

    // 4th step: set jacobians
// const auto start_jacob = std::chrono::high_resolution_clock::now();
    if (jacobians)
    {
        if (jacobians[0])
        {
            Map< Matrix<double, Dynamic, Dynamic, RowMajor> > drdt(jacobians[0], m_nResiduals, 3);

            if (fit_data_.fit3D)
            {
                for (int i = 0; i < m_nCorrespond_adam2joints + m_nCorrespond_adam2pts; i++)
                {
                    if (targetPts[5 * i] == 0 && targetPts[5 * i + 1] == 0 && targetPts[5 * i + 2] == 0) drdt.block(res_dim * i, 0, 3, 3).setZero();
                    else drdt.block(res_dim * i, 0, 3, 3) = m_targetPts_weight.block(5 * i, 0, 3, 1).asDiagonal() * Matrix<double, 3, 3>::Identity();
                }
            }

            if (fit_data_.fit2D)
            {
                Eigen::Map< Matrix<double, Dynamic, 3, RowMajor> > jointArray(tempJoints.data(), m_nCorrespond_adam2joints + m_nCorrespond_adam2pts, 3);
                Matrix<double, Dynamic, Dynamic, RowMajor> dJdt(3, 3);
                dJdt.setIdentity();
                for (int i = 0; i < m_nCorrespond_adam2joints + m_nCorrespond_adam2pts; i++)
                {
                    if (targetPts[5 * i + 3] == 0 && targetPts[5 * i + 4] == 0) drdt.block(res_dim * i + start_2d_dim, 0, 2, 3).setZero();
                    else
                    {
                        projection_Derivative(drdt.data(), dJdt.data(), drdt.cols(), (double*)(jointArray.data() + 3 * i), fit_data_.K, res_dim * i + start_2d_dim, 0, 1.0);
                        drdt.block(res_dim * i + start_2d_dim, 0, 2, 3) = m_targetPts_weight.block(5 * i + 3, 0, 2, 1).asDiagonal() * drdt.block(res_dim * i + start_2d_dim, 0, 2, 3);
                    }
                }
            }

            if (fit_data_.fitPAF)
            {
                // drdt.block(start_PAF, 0, 3 * num_PAF_constraint, 3).setZero();
                std::fill(jacobians[0] + 3 * start_PAF, jacobians[0] + 3 * start_PAF + 9 * num_PAF_constraint, 0);
            }

            // inner constraint 1
            offset_inner = start_inner;
            drdt.block(offset_inner, 0, inner_dim[0], 3).setZero();
        }

        if (jacobians[1])
        {
            Map< Matrix<double, Dynamic, Dynamic, RowMajor> > dr_dPose(jacobians[1], m_nResiduals, TotalModel::NUM_JOINTS * 3); 
            if (fit_data_.fit3D)
            {
                for (int i = 0; i < m_nCorrespond_adam2joints + m_nCorrespond_adam2pts; i++)
                {
                    if (targetPts[5 * i] == 0 && targetPts[5 * i + 1] == 0 && targetPts[5 * i + 2] == 0)
                    {
                        std::fill(dr_dPose.data() + res_dim * i * TotalModel::NUM_POSE_PARAMETERS,
                                  dr_dPose.data() + (3 + res_dim * i) * TotalModel::NUM_POSE_PARAMETERS, 0);
                        // dr_dPose.block(res_dim * (i + offset), 0, 3, TotalModel::NUM_POSE_PARAMETERS).setZero();
                    }
                    else dr_dPose.block(res_dim * i, 0, 3, TotalModel::NUM_POSE_PARAMETERS) = m_targetPts_weight.block(5 * i, 0, 3, 1).asDiagonal() *
                        dOdP.block(3 * i, 0, 3, TotalModel::NUM_POSE_PARAMETERS);
                }
            }

            if (fit_data_.fit2D)
            {
                Eigen::Map< Matrix<double, Dynamic, 3, RowMajor> > jointArray(tempJoints.data(), m_nCorrespond_adam2joints + m_nCorrespond_adam2pts, 3);
                for (int i = 0; i < m_nCorrespond_adam2joints + m_nCorrespond_adam2pts; i++)
                {
                    if (targetPts[5 * i + 3] == 0 && targetPts[5 * i + 4] == 0)
                    {
                        std::fill(dr_dPose.data() + (start_2d_dim + res_dim * i) * TotalModel::NUM_POSE_PARAMETERS,
                                  dr_dPose.data() + (2 + start_2d_dim + res_dim * i) * TotalModel::NUM_POSE_PARAMETERS, 0);
                        // dr_dPose.block(res_dim * (i + offset), 0, 3, TotalModel::NUM_POSE_PARAMETERS).setZero();
                    }
                    else
                    {
                        projection_Derivative(dr_dPose.data(), dOdP.data(), dr_dPose.cols(), (double*)(jointArray.data() + 3 * i), fit_data_.K,
                                               res_dim * i + start_2d_dim, 3 * i, 1.);
                        dr_dPose.block(res_dim * i + start_2d_dim, 0, 2, TotalModel::NUM_POSE_PARAMETERS)
                            = m_targetPts_weight.block(5 * i + 3, 0, 2, 1).asDiagonal() * dr_dPose.block(res_dim * i + start_2d_dim, 0, 2, TotalModel::NUM_POSE_PARAMETERS);
                    }
                }
            }

            if (fit_data_.fitPAF)
            {
                const int offset = start_PAF;
                for (auto i = 0; i < num_PAF_constraint; i++)
                {
                    if (fit_data_.PAF.col(i).isZero(0))
                    {
                        // dr_dPose.block(offset + 3 * i, 0, 3, TotalModel::NUM_POSE_PARAMETERS).setZero();
                        std::fill(dr_dPose.data() + (offset + 3 * i) * TotalModel::NUM_POSE_PARAMETERS,
                                  dr_dPose.data() + (offset + 3 * i + 3) * TotalModel::NUM_POSE_PARAMETERS, 0);
                        continue;
                    }
                    const std::array<double, 3> AB{{
                        out_data[PAF_connection[i][2]][3 * PAF_connection[i][3] + 0] - out_data[PAF_connection[i][0]][3 * PAF_connection[i][1] + 0], 
                        out_data[PAF_connection[i][2]][3 * PAF_connection[i][3] + 1] - out_data[PAF_connection[i][0]][3 * PAF_connection[i][1] + 1], 
                        out_data[PAF_connection[i][2]][3 * PAF_connection[i][3] + 2] - out_data[PAF_connection[i][0]][3 * PAF_connection[i][1] + 2], 
                    }};
                    const auto length2 = AB[0] * AB[0] + AB[1] * AB[1] + AB[2] * AB[2];
                    const auto length = sqrt(length2);
                    const Eigen::Map< const Matrix<double, 3, 1> > AB_vec(AB.data());
                    const Eigen::Matrix<double, 3, 3, RowMajor> dudJ = Eigen::Matrix<double, 3, 3>::Identity() / length - AB_vec * AB_vec.transpose() / length2 / length;
                    if (PAF_connection[i][0] == 0 && PAF_connection[i][2] == 0)
                    {
                        std::fill(dr_dPose.data() + (offset + 3 * i) * TotalModel::NUM_POSE_PARAMETERS, dr_dPose.data() + (offset + 3 * i + 3) * TotalModel::NUM_POSE_PARAMETERS, 0);
                        const double* dudJ_data = dudJ.data();
                        double* drdp_row0 = dr_dPose.data() + (offset + 3 * i) * TotalModel::NUM_POSE_PARAMETERS;
                        double* drdp_row1 = dr_dPose.data() + (offset + 3 * i + 1) * TotalModel::NUM_POSE_PARAMETERS;
                        double* drdp_row2 = dr_dPose.data() + (offset + 3 * i + 2) * TotalModel::NUM_POSE_PARAMETERS;
                        {
                            const double* dJdP_row0 = dTJdP.data() + (3 * PAF_connection[i][3]) * TotalModel::NUM_POSE_PARAMETERS;
                            const double* dJdP_row1 = dTJdP.data() + (3 * PAF_connection[i][3] + 1) * TotalModel::NUM_POSE_PARAMETERS;
                            const double* dJdP_row2 = dTJdP.data() + (3 * PAF_connection[i][3] + 2) * TotalModel::NUM_POSE_PARAMETERS;
                            for(auto& ipar: parentIndexes[PAF_connection[i][3]])
                            {
                                drdp_row0[3 * ipar + 0] += PAF_weight[i] * (dudJ_data[0] * dJdP_row0[3 * ipar + 0] + dudJ_data[1] * dJdP_row1[3 * ipar + 0] + dudJ_data[2] * dJdP_row2[3 * ipar + 0]);
                                drdp_row0[3 * ipar + 1] += PAF_weight[i] * (dudJ_data[0] * dJdP_row0[3 * ipar + 1] + dudJ_data[1] * dJdP_row1[3 * ipar + 1] + dudJ_data[2] * dJdP_row2[3 * ipar + 1]);
                                drdp_row0[3 * ipar + 2] += PAF_weight[i] * (dudJ_data[0] * dJdP_row0[3 * ipar + 2] + dudJ_data[1] * dJdP_row1[3 * ipar + 2] + dudJ_data[2] * dJdP_row2[3 * ipar + 2]);
                                drdp_row1[3 * ipar + 0] += PAF_weight[i] * (dudJ_data[3] * dJdP_row0[3 * ipar + 0] + dudJ_data[4] * dJdP_row1[3 * ipar + 0] + dudJ_data[5] * dJdP_row2[3 * ipar + 0]);
                                drdp_row1[3 * ipar + 1] += PAF_weight[i] * (dudJ_data[3] * dJdP_row0[3 * ipar + 1] + dudJ_data[4] * dJdP_row1[3 * ipar + 1] + dudJ_data[5] * dJdP_row2[3 * ipar + 1]);
                                drdp_row1[3 * ipar + 2] += PAF_weight[i] * (dudJ_data[3] * dJdP_row0[3 * ipar + 2] + dudJ_data[4] * dJdP_row1[3 * ipar + 2] + dudJ_data[5] * dJdP_row2[3 * ipar + 2]);
                                drdp_row2[3 * ipar + 0] += PAF_weight[i] * (dudJ_data[6] * dJdP_row0[3 * ipar + 0] + dudJ_data[7] * dJdP_row1[3 * ipar + 0] + dudJ_data[8] * dJdP_row2[3 * ipar + 0]);
                                drdp_row2[3 * ipar + 1] += PAF_weight[i] * (dudJ_data[6] * dJdP_row0[3 * ipar + 1] + dudJ_data[7] * dJdP_row1[3 * ipar + 1] + dudJ_data[8] * dJdP_row2[3 * ipar + 1]);
                                drdp_row2[3 * ipar + 2] += PAF_weight[i] * (dudJ_data[6] * dJdP_row0[3 * ipar + 2] + dudJ_data[7] * dJdP_row1[3 * ipar + 2] + dudJ_data[8] * dJdP_row2[3 * ipar + 2]);
                            }
                        }
                        {
                            const double* dJdP_row0 = dTJdP.data() + (3 * PAF_connection[i][1]) * TotalModel::NUM_POSE_PARAMETERS;
                            const double* dJdP_row1 = dTJdP.data() + (3 * PAF_connection[i][1] + 1) * TotalModel::NUM_POSE_PARAMETERS;
                            const double* dJdP_row2 = dTJdP.data() + (3 * PAF_connection[i][1] + 2) * TotalModel::NUM_POSE_PARAMETERS;
                            for(auto& ipar: parentIndexes[PAF_connection[i][1]])
                            {
                                drdp_row0[3 * ipar + 0] -= PAF_weight[i] * (dudJ_data[0] * dJdP_row0[3 * ipar + 0] + dudJ_data[1] * dJdP_row1[3 * ipar + 0] + dudJ_data[2] * dJdP_row2[3 * ipar + 0]);
                                drdp_row0[3 * ipar + 1] -= PAF_weight[i] * (dudJ_data[0] * dJdP_row0[3 * ipar + 1] + dudJ_data[1] * dJdP_row1[3 * ipar + 1] + dudJ_data[2] * dJdP_row2[3 * ipar + 1]);
                                drdp_row0[3 * ipar + 2] -= PAF_weight[i] * (dudJ_data[0] * dJdP_row0[3 * ipar + 2] + dudJ_data[1] * dJdP_row1[3 * ipar + 2] + dudJ_data[2] * dJdP_row2[3 * ipar + 2]);
                                drdp_row1[3 * ipar + 0] -= PAF_weight[i] * (dudJ_data[3] * dJdP_row0[3 * ipar + 0] + dudJ_data[4] * dJdP_row1[3 * ipar + 0] + dudJ_data[5] * dJdP_row2[3 * ipar + 0]);
                                drdp_row1[3 * ipar + 1] -= PAF_weight[i] * (dudJ_data[3] * dJdP_row0[3 * ipar + 1] + dudJ_data[4] * dJdP_row1[3 * ipar + 1] + dudJ_data[5] * dJdP_row2[3 * ipar + 1]);
                                drdp_row1[3 * ipar + 2] -= PAF_weight[i] * (dudJ_data[3] * dJdP_row0[3 * ipar + 2] + dudJ_data[4] * dJdP_row1[3 * ipar + 2] + dudJ_data[5] * dJdP_row2[3 * ipar + 2]);
                                drdp_row2[3 * ipar + 0] -= PAF_weight[i] * (dudJ_data[6] * dJdP_row0[3 * ipar + 0] + dudJ_data[7] * dJdP_row1[3 * ipar + 0] + dudJ_data[8] * dJdP_row2[3 * ipar + 0]);
                                drdp_row2[3 * ipar + 1] -= PAF_weight[i] * (dudJ_data[6] * dJdP_row0[3 * ipar + 1] + dudJ_data[7] * dJdP_row1[3 * ipar + 1] + dudJ_data[8] * dJdP_row2[3 * ipar + 1]);
                                drdp_row2[3 * ipar + 2] -= PAF_weight[i] * (dudJ_data[6] * dJdP_row0[3 * ipar + 2] + dudJ_data[7] * dJdP_row1[3 * ipar + 2] + dudJ_data[8] * dJdP_row2[3 * ipar + 2]);
                            }
                        }
                        // for(auto& ipar: parentIndexes[PAF_connection[i][3]])
                        //     dr_dPose.block(offset + 3 * i, 3 * ipar, 3, 3) += PAF_weight[i] * dudJ * dodP[PAF_connection[i][2]]->block(3 * PAF_connection[i][3], 3 * ipar, 3, 3);
                        // for(auto& ipar: parentIndexes[PAF_connection[i][1]])
                        //     dr_dPose.block(offset + 3 * i, 3 * ipar, 3, 3) -= PAF_weight[i] * dudJ * dodP[PAF_connection[i][0]]->block(3 * PAF_connection[i][1], 3 * ipar, 3, 3);
                    }
                    else
                    {
                        // slow
                        dr_dPose.block(offset + 3 * i, 0, 3, TotalModel::NUM_POSE_PARAMETERS) = PAF_weight[i] * dudJ * 
                            ( dodP[PAF_connection[i][2]]->block(3 * PAF_connection[i][3], 0, 3, TotalModel::NUM_POSE_PARAMETERS) -
                            dodP[PAF_connection[i][0]]->block(3 * PAF_connection[i][1], 0, 3, TotalModel::NUM_POSE_PARAMETERS) );
                    }
                }
            }

            offset_inner = start_inner;
            if (fit_data_.inner_weight[0] > 0)
            {
                // the first defined inner constraints: should not bend (adam joint 6 should be close to the middle of central hip and neck)
                dr_dPose.block(offset_inner, 0, inner_dim[0], TotalModel::NUM_POSE_PARAMETERS) =
                    fit_data_.inner_weight[0] * (dTJdP.block(6 * 3, 0, 3, TotalModel::NUM_POSE_PARAMETERS) -
                    0.5 * (dTJdP.block(0 * 3, 0, 3, TotalModel::NUM_POSE_PARAMETERS) + dTJdP.block(12 * 3, 0, 3, TotalModel::NUM_POSE_PARAMETERS)));
            }
            else
                dr_dPose.block(offset_inner, 0, inner_dim[0], TotalModel::NUM_POSE_PARAMETERS).setZero();

            if (rigid_body)
                dr_dPose.block(0, 3, m_nResiduals, TotalModel::NUM_POSE_PARAMETERS - 3).setZero();
        }

        if (rigid_body)
        {
            if (jacobians[2])
                std::fill(jacobians[2], jacobians[2] + m_nResiduals * TotalModel::NUM_SHAPE_COEFFICIENTS, 0);
            if (fit_face_exp && jacobians[3])
                std::fill(jacobians[3], jacobians[3] + m_nResiduals * TotalModel::NUM_EXP_BASIS_COEFFICIENTS, 0);
            return true;
        }

        if (jacobians[2])
        {
            Map< Matrix<double, Dynamic, Dynamic, RowMajor> > dr_dCoeff(jacobians[2], m_nResiduals, TotalModel::NUM_SHAPE_COEFFICIENTS);    

            if (fit_data_.fit3D)
            {
                for (int i = 0; i < m_nCorrespond_adam2joints + m_nCorrespond_adam2pts; i++)
                {
                    if (targetPts[5 * i] == 0 && targetPts[5 * i + 1] == 0 && targetPts[5 * i + 2] == 0)
                    {
                        std::fill(dr_dCoeff.data() + res_dim * i * TotalModel::NUM_SHAPE_COEFFICIENTS,
                                  dr_dCoeff.data() + (3 + res_dim * i) * TotalModel::NUM_SHAPE_COEFFICIENTS, 0);
                    }
                    else dr_dCoeff.block(res_dim * i, 0, 3, TotalModel::NUM_SHAPE_COEFFICIENTS) = m_targetPts_weight.block(5 * i, 0, 3, 1).asDiagonal() *
                        dOdc.block(3 * i, 0, 3, TotalModel::NUM_SHAPE_COEFFICIENTS);
                }
            }

            if (fit_data_.fit2D)
            {
                Eigen::Map< Matrix<double, Dynamic, 3, RowMajor> > jointArray(tempJoints.data(), m_nCorrespond_adam2joints + m_nCorrespond_adam2pts, 3);
                for (int i = 0; i < m_nCorrespond_adam2joints + m_nCorrespond_adam2pts; i++)
                {
                    if (targetPts[5 * i + 3] == 0 && targetPts[5 * i + 4] == 0)
                    {
                        std::fill(dr_dCoeff.data() + (start_2d_dim + res_dim * i) * TotalModel::NUM_SHAPE_COEFFICIENTS,
                                  dr_dCoeff.data() + (2 + start_2d_dim + res_dim * i) * TotalModel::NUM_SHAPE_COEFFICIENTS, 0);
                    }
                    else
                    {
                        projection_Derivative(dr_dCoeff.data(), dOdc.data(), dr_dCoeff.cols(), (double*)(jointArray.data() + 3 * i), fit_data_.K,
                                               res_dim * i + start_2d_dim, 3 * i, 1.);
                        dr_dCoeff.block(res_dim * i + start_2d_dim, 0, 2, TotalModel::NUM_SHAPE_COEFFICIENTS) = 
                            m_targetPts_weight.block(5 * i + 3, 0, 2, 1).asDiagonal() * dr_dCoeff.block(res_dim * i + start_2d_dim, 0, 2, TotalModel::NUM_SHAPE_COEFFICIENTS);
                    }
                }
            }

            if (fit_data_.fitPAF)
            {
                const int offset = start_PAF;
                for (auto i = 0; i < num_PAF_constraint; i++)
                {
                    if (fit_data_.PAF.col(i).isZero(0))
                    {
                        std::fill(dr_dCoeff.data() + (offset + 3 * i) * TotalModel::NUM_SHAPE_COEFFICIENTS,
                                  dr_dCoeff.data() + (offset + 3 * i + 3) * TotalModel::NUM_SHAPE_COEFFICIENTS, 0);
                        // dr_dCoeff.block(offset + 3 * i, 0, 3, TotalModel::NUM_SHAPE_COEFFICIENTS).setZero();
                        continue;
                    }
                    const std::array<double, 3> AB{{
                        out_data[PAF_connection[i][2]][3 * PAF_connection[i][3] + 0] - out_data[PAF_connection[i][0]][3 * PAF_connection[i][1] + 0], 
                        out_data[PAF_connection[i][2]][3 * PAF_connection[i][3] + 1] - out_data[PAF_connection[i][0]][3 * PAF_connection[i][1] + 1], 
                        out_data[PAF_connection[i][2]][3 * PAF_connection[i][3] + 2] - out_data[PAF_connection[i][0]][3 * PAF_connection[i][1] + 2], 
                    }};
                    const auto length2 = AB[0] * AB[0] + AB[1] * AB[1] + AB[2] * AB[2];
                    const auto length = sqrt(length2);
                    const Eigen::Map< const Matrix<double, 3, 1> > AB_vec(AB.data());
                    const Eigen::Matrix<double, 3, 3, RowMajor> dudJ = Eigen::Matrix<double, 3, 3>::Identity() / length - AB_vec * AB_vec.transpose() / length2 / length;
                    dr_dCoeff.block(offset + 3 * i, 0, 3, TotalModel::NUM_SHAPE_COEFFICIENTS) = PAF_weight[i] * dudJ * 
                        ( dodc[PAF_connection[i][2]]->block(3 * PAF_connection[i][3], 0, 3, TotalModel::NUM_SHAPE_COEFFICIENTS) -
                        dodc[PAF_connection[i][0]]->block(3 * PAF_connection[i][1], 0, 3, TotalModel::NUM_SHAPE_COEFFICIENTS) );
                }
            }

            offset_inner = start_inner;
            if (fit_data_.inner_weight[0] > 0)
            {
                // the first defined inner constraints: should not bend (adam joint 6 should be close to the middle of central hip and neck)
                dr_dCoeff.block(offset_inner, 0, inner_dim[0], TotalModel::NUM_SHAPE_COEFFICIENTS) =
                    fit_data_.inner_weight[0] * (dTJdc.block(6 * 3, 0, 3, TotalModel::NUM_SHAPE_COEFFICIENTS) -
                    0.5 * (dTJdc.block(0 * 3, 0, 3, TotalModel::NUM_SHAPE_COEFFICIENTS) + dTJdc.block(12 * 3, 0, 3, TotalModel::NUM_SHAPE_COEFFICIENTS)));
            }
            else
                dr_dCoeff.block(offset_inner, 0, inner_dim[0], TotalModel::NUM_SHAPE_COEFFICIENTS).setZero();
        }

        if (fit_face_exp && jacobians[3])
        {
            auto* jac3 = jacobians[3];
            const auto jac3Cols = TotalModel::NUM_EXP_BASIS_COEFFICIENTS;
            Map< Matrix<double, Dynamic, Dynamic, RowMajor> > dr_dfc(jac3, m_nResiduals, jac3Cols);

            if (fit_data_.fit3D)
            {
                for (int i = 0; i < m_nCorrespond_adam2joints + m_nCorrespond_adam2pts; i++)
                {
                    if (targetPts[5 * i] == 0 && targetPts[5 * i + 1] == 0 && targetPts[5 * i + 2] == 0)
                    {
                        std::fill(jac3 + res_dim * i * TotalModel::NUM_EXP_BASIS_COEFFICIENTS,
                                  jac3 + (3 + res_dim * i) * TotalModel::NUM_EXP_BASIS_COEFFICIENTS, 0);
                    }
                    else dr_dfc.block<3, TotalModel::NUM_EXP_BASIS_COEFFICIENTS>(res_dim * i, 0) = m_targetPts_weight.block(5 * i, 0, 3, 1).asDiagonal() *
                        dOdfc.block<3, TotalModel::NUM_EXP_BASIS_COEFFICIENTS>(3 * i, 0);
                }
            }

            if (fit_data_.fit2D)
            {
                Eigen::Map< Matrix<double, Dynamic, 3, RowMajor> > jointArray(tempJointsPtr, m_nCorrespond_adam2joints + m_nCorrespond_adam2pts, 3);
                for (int i = 0; i < m_nCorrespond_adam2joints + m_nCorrespond_adam2pts; i++)
                {
                    if (targetPts[5 * i + 3] == 0 && targetPts[5 * i + 4] == 0)
                    {
                        std::fill(jac3 + (start_2d_dim + res_dim * i) * TotalModel::NUM_EXP_BASIS_COEFFICIENTS,
                                  jac3 + (2 + start_2d_dim + res_dim * i) * TotalModel::NUM_EXP_BASIS_COEFFICIENTS, 0);
                    }
                    else 
                    {
                        projection_Derivative(jac3, dOdfc.data(), jac3Cols, (double*)(jointArray.data() + 3 * i), fit_data_.K,
                                               res_dim * i + start_2d_dim, 3 * i, 1.);
                        dr_dfc.block(res_dim * i + start_2d_dim, 0, 2, TotalModel::NUM_EXP_BASIS_COEFFICIENTS) =
                            m_targetPts_weight.block(5 * i + 3, 0, 2, 1).asDiagonal() * dr_dfc.block(res_dim * i + start_2d_dim, 0, 2, TotalModel::NUM_EXP_BASIS_COEFFICIENTS);
                    }
                }
            }

            if (fit_data_.fitPAF)
            {
                const int offset = start_PAF;
                for (auto i = 0; i < num_PAF_constraint; i++)
                {
                    if (fit_data_.PAF.col(i).isZero(0))
                    {
                        std::fill(jac3 + (offset + 3 * i) * TotalModel::NUM_EXP_BASIS_COEFFICIENTS,
                                  jac3 + (offset + 3 * i + 3) * TotalModel::NUM_EXP_BASIS_COEFFICIENTS, 0);
                        // dr_dCoeff.block(offset + 3 * i, 0, 3, TotalModel::NUM_SHAPE_COEFFICIENTS).setZero();
                        continue;
                    }
                    if (out_data[PAF_connection[i][0]] == nullptr || out_data[PAF_connection[i][2]] == nullptr)
                        continue;
                    const auto& pafConnectionI = PAF_connection[i];
                    const std::array<double, 3> AB{{
                        out_data[pafConnectionI[2]][3 * pafConnectionI[3]    ] - out_data[pafConnectionI[0]][3 * pafConnectionI[1]],
                        out_data[pafConnectionI[2]][3 * pafConnectionI[3] + 1] - out_data[pafConnectionI[0]][3 * pafConnectionI[1] + 1],
                        out_data[pafConnectionI[2]][3 * pafConnectionI[3] + 2] - out_data[pafConnectionI[0]][3 * pafConnectionI[1] + 2],
                    }};
                    const auto length2 = AB[0] * AB[0] + AB[1] * AB[1] + AB[2] * AB[2];
                    const auto length = sqrt(length2);
                    const Eigen::Map< const Matrix<double, 3, 1> > AB_vec(AB.data());
                    const Eigen::Matrix<double, 3, 3, RowMajor> dudJ = Eigen::Matrix<double, 3, 3>::Identity() / length - AB_vec * AB_vec.transpose() / length2 / length;
                    dr_dfc.block<3, TotalModel::NUM_EXP_BASIS_COEFFICIENTS>(offset + 3 * i, 0) = PAF_weight[i] * dudJ *
                        ( dodfc[PAF_connection[i][2]]->block(3 * PAF_connection[i][3], 0, 3, TotalModel::NUM_EXP_BASIS_COEFFICIENTS) -
                        dodfc[PAF_connection[i][0]]->block(3 * PAF_connection[i][1], 0, 3, TotalModel::NUM_EXP_BASIS_COEFFICIENTS) );
                }
            }

            std::fill(jac3 + start_inner*jac3Cols,
                      jac3 + (start_inner+inner_dim[0])*jac3Cols, 0.0);
            // // Vectorized (slower) equivalent of above code
            // dr_dfc.block(start_inner, 0, inner_dim[0], jac3Cols).setZero();
        }
    }
// const auto duration_jacob = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - start_jacob).count();
// const auto duration_iter = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - start_iter).count();
// std::cout << "iter " << duration_iter * 1e-6 << "\n";
// std::cout << "FK " << duration_FK * 1e-6 << "\n";
// std::cout << "transJ " << duration_transJ * 1e-6 << "\n";
// std::cout << "LBS " << duration_LBS * 1e-6 << "\n";
// std::cout << "target " << duration_target * 1e-6 << "\n";
// std::cout << "Residual " << duration_res * 1e-6 << "\n";
// std::cout << "Jacobian " << duration_jacob * 1e-6 << "\n";
// std::cout << "------------------------------------------\n";
    return true;
}