tmk/cpp/algo/tmkfv.h (128 lines of code) (raw):
// ================================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// ================================================================
// ================================================================
// Wrapper class for TMK feature-vectors. It includes methods for
// computing them on a streaming basis from frame-features, one frame-feature
// at a time, as well as methods for manipulating them when loaded from disk.
// ================================================================
// ================================================================
// TMK NORMALIZED SCORING:
//
// The level-1 score is the cosine similarity between two pure-average
// features. It is not TMK. It is a criterion we use to determine whether or
// not to compute the full TMK pair score. (It has been found that when the
// level-1 scores differ significantly, the level-2 scores will as well, so the
// expensive level-2 score can be skipped. On the other hand, if the level-1
// scores are close, the level-2 scores might not be.)
//
// The TMK pair score (a.k.a. level-2 score) is as follows:
//
// For single videos:
// * Have P periods (nominally four of them).
// * Have m fourier coefficents a_0 through a_{m-1} (nominally m=32).
// * Note that the dot product is bilinear on vectors u and v so if u and v
// are both scaled by sqrt(a_j) then their dot is scaled by a_j.
// * For each period accumulate m cosine features and m sine features.
// * Suppose there are N frame-features ("frame hashes").
// * L2-normalize these.
// * 0th cosine feature is sqrt(a_0) sum_{over frames} (frame feature)
// * 0th sine feature is always zero
// * jth cosine feature is sqrt(a_j) sum_{over frames} (frame feature
// times cos(2 pi j t T))
// where T is the period and t is the integer timestamp of the frame
// * jth sine feature is sqrt(a_j) sum_{over frames} (frame feature
// times sin(2 pi j t T))
// where T is the period and t is the integer timestamp of the frame
// * Once frame-features are all ingested then L2-normalize all the
// cosine/sine features and then scale by sqrt(a_j).
//
// To score a pair of videos:
// * For each period T iterate over all offsets 0 .. T-1
// * For a given period and offset compute (see Poullot or Baraldi paper)
// o delta = 2 pi offset / period
// o Let u_{c,j} and u_{s,j} be the jth cosine/sine feature for the 1st video
// o Let v_{c,j} and v_{s,j} be the jth cosine/sine feature for the 1st video
// o Recall these have norm a_j: each vector is scaled by sqrt(a_j) so norms
// are a_j.
// o K_delta = u_{c,0} . v_{c,0}
// + sum_{j=1}^{m-1} cos(j delta) * (u_{c,j} . v_{c,j} + u_{s,j} . v_{s,j})
// + sum_{j=1}^{m-1} sin(j delta) * (u_{s,j} . v_{c,j} - u_{c,j} . v_{s,j})
// * For a given period, the TMK score is the maximum of K_delta for all
// offsets.
// * The TMK score for the pair of videos is either the max or sum over all
// periods.
//
// All well and good but now we need to normalize these to get a score between
// 0 and 1.
//
// To get that, suppose we are scoring a video with itself. This means each
// feature u_{.,j} == v_{.,j} and best offset is delta = 0.
//
// Then for each period,
//
// K_delta = u_{c,0} . u_{c,0}
// + sum_{j=1}^{m-1} cos(j delta) * (u_{c,j} . u_{c,j} + u_{s,j} . u_{s,j})
// + sum_{j=1}^{m-1} sin(j delta) * (u_{s,j} . u_{c,j} - u_{c,j} . u_{s,j})
//
// Since u_{.,j} . u_{.,j} = a_j (normalization) this is
//
// K_delta = a_0
// + sum_{j=1}^{m-1} cos(j delta) * 2 a_j
// + sum_{j=1}^{m-1} sin(j delta) * 0
//
// = a_0 + 2 sum_{j=1}^{m-1} cos(j delta)
//
// and since delta = 0 at best offset for self-scoring
//
// K_0 = a_0 + 2 sum_{j=1}^{m-1} a_j
//
// In the code below, this sum
//
// a_0 + 2 sum_{j=1}^{m-1} a_j
//
// is called _pairScoreNormalizer.
// ================================================================
#ifndef TMKFV_H
#define TMKFV_H
#include <stdio.h>
#include <tmk/cpp/io/tmkio.h>
#include <memory>
#include <vector>
namespace facebook {
namespace tmk {
namespace algo {
// ----------------------------------------------------------------
// TMK ingests frame-features or hashes. They are accumulated as time-weighted
// sums index by fourier period, then by fourier coefficients.
// These are parameters for TMK frame-feature processing:
using Periods = std::vector<int>;
// These are parameters for TMK frame-feature processing:
using FourierCoefficients = std::vector<float>;
// These are input to TMK frame-feature processing, also used for TMK
// internal state:
using FrameFeature = std::vector<float>;
// TMK internal state: The cosine/sine-feature dimensions are
//
// (numPeriods x numFourierCoefficients x frameFeatureDimension).
//
// If either numPeriods or numFourierCoefficients (or both) is zero then
// only the pure-average "level-1" feature will be computed.
using FeaturesByFourierCoefficient = std::vector<FrameFeature>;
using FeaturesByPeriodsAndFourierCoefficients =
std::vector<FeaturesByFourierCoefficient>;
// TMK alignment. Length is number of periods. These are used for detailed
// TMK-alignment values (when desired).
using BestOffsets = std::vector<int>;
using ValuesAtBestOffsets = std::vector<float>;
const int TMK_DEFAULT_RESAMPLE_FPS = 15;
// ================================================================
class TMKFeatureVectors {
// ----------------------------------------------------------------
private:
facebook::tmk::io::TMKFramewiseAlgorithm _algorithm;
int _framesPerSecond; // provenance of the data
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Input coefficients
Periods _periods;
FourierCoefficients _fourierCoefficients;
int _frameFeatureDimension;
int _frameFeatureCount;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Output features
// This is guaranteed to be independent of any model-training parameters.
FrameFeature _pureAverageFeature;
// These ones may depend on model-training parameters.
// As noted above, the cosine/sine-feature dimensions are
//
// (numPeriods x numFourierCoefficients x frameFeatureDimension).
//
// This means:
// * Outer index is 0 to _periods.size() - 1 inclusive.
// * Middle index is 0 to _fourierCoefficients.size() - 1 inclusive.
// * Inner index is 0 to _frameFeatureDimension - 1 inclusive.
//
// This therefore means that cosine/sine-feature dimensions
// are (numPeriods x numFourierCoefficients x frameFeatureDimension).
FeaturesByPeriodsAndFourierCoefficients _cosFeatures;
FeaturesByPeriodsAndFourierCoefficients _sinFeatures;
// When we compute the TMK score of a hashed video with itself, we want 1.0.
// This number does that. The algebraic derivation is above.
float _pairScoreNormalizer;
// ----------------------------------------------------------------
public:
TMKFeatureVectors() {}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// For constructing before computing from frame features.
TMKFeatureVectors(
facebook::tmk::io::TMKFramewiseAlgorithm algorithm, // provenance
int framesPerSecond, // provenance
const Periods& periods,
const FourierCoefficients& fourierCoefficients,
int frameFeatureDimension);
// For constructing after computing from frame features, e.g. load from
// file. Since it is possible for the dimensions to be inconsistent, we make
// this private and provide access via a factory method.
private:
TMKFeatureVectors(
facebook::tmk::io::TMKFramewiseAlgorithm algorithm, // provenance
int framesPerSecond, // provenance
int frameFeatureCount, // informational
const Periods& periods,
const FourierCoefficients& fourierCoefficients,
const FrameFeature& pureAverageFeature,
const FeaturesByPeriodsAndFourierCoefficients& cosFeatures,
const FeaturesByPeriodsAndFourierCoefficients& sinFeatures);
public:
// See the above private constructor. This is used for reading precomputed
// results from storage.
static std::shared_ptr<TMKFeatureVectors> tryCreateFromPrecomputed(
facebook::tmk::io::TMKFramewiseAlgorithm algorithm,
int framesPerSecond,
int frameFeatureCount,
const Periods& periods,
const FourierCoefficients& fourierCoefficients,
const FrameFeature& pureAverageFeature,
const FeaturesByPeriodsAndFourierCoefficients& cosFeatures,
const FeaturesByPeriodsAndFourierCoefficients& sinFeatures);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Checks the two are computed using the same periods, same framewise hasher,
// etc.
static bool areCompatible(
const TMKFeatureVectors& fva,
const TMKFeatureVectors& fvb);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static Periods makePoullotPeriods();
static FourierCoefficients makePoullotFourierCoefficients();
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void ingestFrameFeature(const FrameFeature& frameFeature, int frameNumber);
void finishFrameFeatureIngest();
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
bool writeToOutputStream(FILE* fp, const char* programName) const;
bool writeToOutputFile(const char* fileName, const char* programName) const;
static std::shared_ptr<TMKFeatureVectors> readFromInputStream(
FILE* fp,
const char* programName);
static std::shared_ptr<TMKFeatureVectors> readFromInputFile(
const char* fileName,
const char* programName);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
facebook::tmk::io::TMKFramewiseAlgorithm getAlgorithm() const {
return _algorithm;
}
int getNumPeriods() const {
return _periods.size();
}
int getNumFourierCoefficients() const {
return _fourierCoefficients.size();
}
int getFrameFeatureDimension() const {
return _frameFeatureDimension;
}
int getFramesPerSecond() const {
return _framesPerSecond;
}
int getFrameFeatureCount() const {
return _frameFeatureCount;
}
Periods getPeriods() const {
return _periods;
}
FourierCoefficients getFourierCoefficients() const {
return _fourierCoefficients;
}
FrameFeature getPureAverageFeature() const {
return _pureAverageFeature;
}
FeaturesByPeriodsAndFourierCoefficients getCosFeatures() const {
return _cosFeatures;
}
FeaturesByPeriodsAndFourierCoefficients getSinFeatures() const {
return _sinFeatures;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void L2NormalizePureAverageFeature();
static void findPairOffsetsModuloPeriods(
const TMKFeatureVectors& fva,
const TMKFeatureVectors& fvb,
BestOffsets& bestOffsets,
ValuesAtBestOffsets& valuesAtBestOffsets,
bool printDetails);
static float computeLevel1Score(
const TMKFeatureVectors& fva,
const TMKFeatureVectors& fvb);
static float computeLevel2Score(
const TMKFeatureVectors& fva,
const TMKFeatureVectors& fvb);
static bool compare(
const TMKFeatureVectors& fva,
const TMKFeatureVectors& fvb,
float tolerance);
};
} // namespace algo
} // namespace tmk
} // namespace facebook
#endif // TMKFV_H