opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java [104:164]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      int l1 = str1List.size(), l2 = str2List.size();
      if (l1 < 2)
        l1 = str1Words.length;
      if (l2 < 2)
        l2 = str2Words.length;

      int lOverlap = 0;
      List<String> strListOverlap = new ArrayList<>(str1List);
      strListOverlap.retainAll(str2List);
      for (String w : strListOverlap) {
        if (w.toLowerCase().equals(w)) // no special interest word
          lOverlap++;
        else
          lOverlap += 2; // if capitalized, or specific word => important so
                         // double score
      }
      result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
          / (double) l2, 0.4);

      // now we try to find similar words which are long or Upper case
      int countSimilar = 0;
      str1List.removeAll(strListOverlap);
      str2List.removeAll(strListOverlap);
      for (String w1 : str1List) {
        for (String w2 : str2List) {
          if (w1.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
              || !w1.toLowerCase().equals(w1))
            if (w2.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
                || !w2.toLowerCase().equals(w2))
              if (LevensteinDistanceFinder.levensteinDistance(w1, w2, 1, 10, 1,
                  10) <= ACCEPTABLE_DEVIATION_IN_CHAR)
                countSimilar++;
        }
      }
      lOverlap += countSimilar;
      result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
          / (double) l2, 0.4);
      if (result > 1)
        result = 1.0;

      // double ld = LevensteinDistanceFinder. levensteinDistance(str1, str2, 1,
      // 10, 1, 10);
      // System.out.println(ld);

    } catch (Exception e) {
      e.printStackTrace();
      return -1.0;
    }

    Double linguisticScore = -1.0;
    // to be developed - employs linguistic processor
    /*
     * if (result>MIN_SCORE_FOR_LING) { List<List<ParseTreeChunk>> matchResult =
     * pos.matchOrigSentencesCache(str1, str2); linguisticScore =
     * ParseTreeChunkListScorer.getParseTreeChunkListScore(matchResult);
     * System.out.println(matchResult);
     * 
     * // magic formula for 0.7 string match and 0.3 linguistic match result =
     * result*0.7 + linguisticScore/6.0* 0.3; }
     */
    return result;
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java [180:240]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      int l1 = str1List.size(), l2 = str2List.size();
      if (l1 < 2)
        l1 = str1Words.length;
      if (l2 < 2)
        l2 = str2Words.length;

      int lOverlap = 0;
      List<String> strListOverlap = new ArrayList<>(str1List);
      strListOverlap.retainAll(str2List);
      for (String w : strListOverlap) {
        if (w.toLowerCase().equals(w)) // no special interest word
          lOverlap++;
        else
          lOverlap += 2; // if capitalized, or specific word => important so
                         // double score
      }
      result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
          / (double) l2, 0.4);

      // now we try to find similar words which are long or Upper case
      int countSimilar = 0;
      str1List.removeAll(strListOverlap);
      str2List.removeAll(strListOverlap);
      for (String w1 : str1List) {
        for (String w2 : str2List) {
          if (w1.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
              || !w1.toLowerCase().equals(w1))
            if (w2.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
                || !w2.toLowerCase().equals(w2))
              if (LevensteinDistanceFinder.levensteinDistance(w1, w2, 1, 10, 1,
                  10) <= ACCEPTABLE_DEVIATION_IN_CHAR)
                countSimilar++;
        }
      }
      lOverlap += countSimilar;
      result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
          / (double) l2, 0.4);
      if (result > 1)
        result = 1.0;

      // double ld = LevensteinDistanceFinder. levensteinDistance(str1, str2, 1,
      // 10, 1, 10);
      // System.out.println(ld);

    } catch (Exception e) {
      e.printStackTrace();
      return -1.0;
    }

    Double linguisticScore = -1.0;
    // to be developed - employs linguistic processor
    /*
     * if (result>MIN_SCORE_FOR_LING) { List<List<ParseTreeChunk>> matchResult =
     * pos.matchOrigSentencesCache(str1, str2); linguisticScore =
     * ParseTreeChunkListScorer.getParseTreeChunkListScore(matchResult);
     * System.out.println(matchResult);
     * 
     * // magic formula for 0.7 string match and 0.3 linguistic match result =
     * result*0.7 + linguisticScore/6.0* 0.3; }
     */
    return result;
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



