in core/src/main/java/org/apache/sdap/mudrod/utils/SimilarityUtil.java [79:131]
public static JavaRDD<LinkageTriple> calculateSimilarityFromVector(JavaPairRDD<String, Vector> importRDD, int simType) {
JavaRDD<Tuple2<String, Vector>> importRDD1 = importRDD.map(f -> new Tuple2<String, Vector>(f._1, f._2));
JavaPairRDD<Tuple2<String, Vector>, Tuple2<String, Vector>> cartesianRDD = importRDD1.cartesian(importRDD1);
return cartesianRDD.map(new Function<Tuple2<Tuple2<String, Vector>, Tuple2<String, Vector>>, LinkageTriple>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public LinkageTriple call(Tuple2<Tuple2<String, Vector>, Tuple2<String, Vector>> arg) {
String keyA = arg._1._1;
String keyB = arg._2._1;
if (keyA.equals(keyB)) {
return null;
}
Vector vecA = arg._1._2;
Vector vecB = arg._2._2;
Double weight = 0.0;
if (simType == SimilarityUtil.SIM_PEARSON) {
weight = SimilarityUtil.pearsonDistance(vecA, vecB);
} else if (simType == SimilarityUtil.SIM_HELLINGER) {
weight = SimilarityUtil.hellingerDistance(vecA, vecB);
} else if (simType == SimilarityUtil.SIM_COSINE) {
weight = SimilarityUtil.cosineDistance(vecA, vecB);
}
LinkageTriple triple = new LinkageTriple();
triple.keyA = keyA;
triple.keyB = keyB;
triple.weight = weight;
return triple;
}
}).filter(new Function<LinkageTriple, Boolean>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Boolean call(LinkageTriple arg0) throws Exception {
if (arg0 == null) {
return false;
}
return true;
}
});
}