public static int getOptimalStringAlignmentDistance()

in core/src/main/java/com/datastax/dse/driver/internal/core/graph/SearchUtils.java [48:142]


  public static int getOptimalStringAlignmentDistance(String s, String t) {

    /*
     * Code adapted from https://github.com/apache/commons-lang/blob/LANG_2_6/src/main/java/org/apache/commons/lang/StringUtils.java
     * which was originally released under the Apache 2.0 license with the following copyright:
     *
     * Licensed to the Apache Software Foundation (ASF) under one or more
     * contributor license agreements.  See the NOTICE file distributed with
     * this work for additional information regarding copyright ownership.
     * The ASF licenses this file to You under the Apache License, Version 2.0
     * (the "License"); you may not use this file except in compliance with
     * the License.  You may obtain a copy of the License at
     *
     *      http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */
    if (s == null || t == null) {
      throw new IllegalArgumentException("Strings must not be null");
    }

    int n = s.length(); // length of s
    int m = t.length(); // length of t

    if (n == 0) {
      return m;
    } else if (m == 0) {
      return n;
    }

    if (n > m) {
      // swap the input strings to consume less memory
      String tmp = s;
      s = t;
      t = tmp;
      n = m;
      m = t.length();
    }

    // instead of maintaining the full matrix in memory,
    // we use a sliding window containing 3 lines:
    // the current line being written to, and
    // the two previous ones.

    int d[] = new int[n + 1]; // current line in the cost matrix
    int p1[] = new int[n + 1]; // first line above the current one in the cost matrix
    int p2[] = new int[n + 1]; // second line above the current one in the cost matrix
    int _d[]; // placeholder to assist in swapping p1, p2 and d

    // indexes into strings s and t
    int i; // iterates through s
    int j; // iterates through t

    for (i = 0; i <= n; i++) {
      p1[i] = i;
    }

    for (j = 1; j <= m; j++) {

      // jth character of t
      char t_j = t.charAt(j - 1);
      d[0] = j;

      for (i = 1; i <= n; i++) {

        char s_i = s.charAt(i - 1);
        int cost = s_i == t_j ? 0 : 1;

        int deletion = d[i - 1] + 1; // cell to the left + 1
        int insertion = p1[i] + 1; // cell to the top + 1
        int substitution = p1[i - 1] + cost; // cell diagonally left and up + cost

        d[i] = Math.min(Math.min(deletion, insertion), substitution);

        // transposition
        if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
          d[i] = Math.min(d[i], p2[i - 2] + cost);
        }
      }

      // swap arrays
      _d = p2;
      p2 = p1;
      p1 = d;
      d = _d;
    }

    // our last action in the above loop was to switch d and p1, so p1 now
    // actually has the most recent cost counts
    return p1[n];
  }