public static JavaRDD calculateSimilarityFromVector()

in core/src/main/java/org/apache/sdap/mudrod/utils/SimilarityUtil.java [79:131]


  public static JavaRDD<LinkageTriple> calculateSimilarityFromVector(JavaPairRDD<String, Vector> importRDD, int simType) {
    JavaRDD<Tuple2<String, Vector>> importRDD1 = importRDD.map(f -> new Tuple2<String, Vector>(f._1, f._2));
    JavaPairRDD<Tuple2<String, Vector>, Tuple2<String, Vector>> cartesianRDD = importRDD1.cartesian(importRDD1);

    return cartesianRDD.map(new Function<Tuple2<Tuple2<String, Vector>, Tuple2<String, Vector>>, LinkageTriple>() {

      /**
       *
       */
      private static final long serialVersionUID = 1L;

      @Override
      public LinkageTriple call(Tuple2<Tuple2<String, Vector>, Tuple2<String, Vector>> arg) {
        String keyA = arg._1._1;
        String keyB = arg._2._1;

        if (keyA.equals(keyB)) {
          return null;
        }

        Vector vecA = arg._1._2;
        Vector vecB = arg._2._2;
        Double weight = 0.0;

        if (simType == SimilarityUtil.SIM_PEARSON) {
          weight = SimilarityUtil.pearsonDistance(vecA, vecB);
        } else if (simType == SimilarityUtil.SIM_HELLINGER) {
          weight = SimilarityUtil.hellingerDistance(vecA, vecB);
        } else if (simType == SimilarityUtil.SIM_COSINE) {
          weight = SimilarityUtil.cosineDistance(vecA, vecB);
        }

        LinkageTriple triple = new LinkageTriple();
        triple.keyA = keyA;
        triple.keyB = keyB;
        triple.weight = weight;
        return triple;
      }
    }).filter(new Function<LinkageTriple, Boolean>() {
      /**
       *
       */
      private static final long serialVersionUID = 1L;

      @Override
      public Boolean call(LinkageTriple arg0) throws Exception {
        if (arg0 == null) {
          return false;
        }
        return true;
      }
    });
  }