in pose-detection/src/blazepose_tfjs/detector.ts [149:284]
async estimatePoses(
image: PoseDetectorInput, estimationConfig: BlazePoseTfjsEstimationConfig,
timestamp?: number): Promise<Pose[]> {
const config = validateEstimationConfig(estimationConfig);
if (image == null) {
this.reset();
return [];
}
this.maxPoses = config.maxPoses;
// User provided timestamp will override video's timestamp.
if (timestamp != null) {
this.timestamp = timestamp * MILLISECOND_TO_MICRO_SECONDS;
} else {
// For static images, timestamp should be null.
this.timestamp =
isVideo(image) ? image.currentTime * SECOND_TO_MICRO_SECONDS : null;
}
const imageSize = getImageSize(image);
const image3d = tf.tidy(() => tf.cast(toImageTensor(image), 'float32'));
let poseRect = this.regionOfInterest;
if (poseRect == null) {
// Need to run detector again.
const detections = await this.detectPose(image3d);
if (detections.length === 0) {
this.reset();
image3d.dispose();
return [];
}
// Gets the very first detection from PoseDetection.
const firstDetection = detections[0];
// Calculates region of interest based on pose detection, so that can be
// used to detect landmarks.
poseRect = this.poseDetectionToRoi(firstDetection, imageSize);
}
// Detects pose landmarks within specified region of interest of the image.
const poseLandmarksByRoiResult =
await this.poseLandmarksByRoi(poseRect, image3d);
image3d.dispose();
if (poseLandmarksByRoiResult == null) {
this.reset();
return [];
}
const {
landmarks: unfilteredPoseLandmarks,
auxiliaryLandmarks: unfilteredAuxiliaryLandmarks,
poseScore,
worldLandmarks: unfilteredWorldLandmarks,
segmentationMask: unfilteredSegmentationMask,
} = poseLandmarksByRoiResult;
// Smoothes landmarks to reduce jitter.
const {
actualLandmarksFiltered: poseLandmarks,
auxiliaryLandmarksFiltered: auxiliaryLandmarks,
actualWorldLandmarksFiltered: poseWorldLandmarks
} =
this.poseLandmarkFiltering(
unfilteredPoseLandmarks, unfilteredAuxiliaryLandmarks,
unfilteredWorldLandmarks, imageSize);
// Calculates region of interest based on the auxiliary landmarks, to be
// used in the subsequent image.
const poseRectFromLandmarks =
this.poseLandmarksToRoi(auxiliaryLandmarks, imageSize);
// Cache roi for next image.
this.regionOfInterest = poseRectFromLandmarks;
// Smoothes segmentation to reduce jitter
const filteredSegmentationMask =
this.smoothSegmentation && unfilteredSegmentationMask != null ?
this.poseSegmentationFiltering(unfilteredSegmentationMask) :
unfilteredSegmentationMask;
// Scale back keypoints.
const keypoints = poseLandmarks != null ?
normalizedKeypointsToKeypoints(poseLandmarks, imageSize) :
null;
// Add keypoint name.
if (keypoints != null) {
keypoints.forEach((keypoint, i) => {
keypoint.name = BLAZEPOSE_KEYPOINTS[i];
});
}
const keypoints3D = poseWorldLandmarks;
// Add keypoint name.
if (keypoints3D != null) {
keypoints3D.forEach((keypoint3D, i) => {
keypoint3D.name = BLAZEPOSE_KEYPOINTS[i];
});
}
const pose: Pose = {score: poseScore, keypoints, keypoints3D};
if (filteredSegmentationMask !== null) {
// Grayscale to RGBA
const rgbaMask = tf.tidy(() => {
const mask3D =
// tslint:disable-next-line: no-unnecessary-type-assertion
tf.expandDims(filteredSegmentationMask, 2) as tf.Tensor3D;
// Pads a pixel [r] to [r, 0].
const rgMask = tf.pad(mask3D, [[0, 0], [0, 0], [0, 1]]);
// Pads a pixel [r, 0] to [r, 0, 0, r].
return tf.mirrorPad(rgMask, [[0, 0], [0, 0], [0, 2]], 'symmetric');
});
if (!this.smoothSegmentation) {
tf.dispose(filteredSegmentationMask);
}
const segmentation = {
maskValueToLabel,
mask: new BlazePoseTfjsMask(rgbaMask)
};
pose.segmentation = segmentation;
}
return [pose];
}