public static void minimizeHopcroft()

in pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/automaton/MinimizationOperations.java [238:412]


  public static void minimizeHopcroft(Automaton a) {
    a.determinize();
    Set<Transition> tr = a._initial.getTransitionSet();
    if (tr.size() == 1) {
      Transition t = tr.iterator().next();
      if (t._to == a._initial && t._min == Character.MIN_VALUE && t._max == Character.MAX_VALUE) {
        return;
      }
    }
    a.totalize();
    // make arrays for numbered states and effective alphabet
    Set<State> ss = a.getStates();
    State[] states = new State[ss.size()];
    int number = 0;
    for (State q : ss) {
      states[number] = q;
      q._number = number++;
    }
    char[] sigma = a.getStartPoints();
    // initialize data structures
    ArrayList<ArrayList<LinkedList<State>>> reverse = new ArrayList<ArrayList<LinkedList<State>>>();
    for (int q = 0; q < states.length; q++) {
      ArrayList<LinkedList<State>> v = new ArrayList<LinkedList<State>>();
      initialize(v, sigma.length);
      reverse.add(v);
    }
    boolean[][] reverseNonempty = new boolean[states.length][sigma.length];
    ArrayList<LinkedList<State>> partition = new ArrayList<LinkedList<State>>();
    initialize(partition, states.length);
    int[] block = new int[states.length];
    StateList[][] active = new StateList[states.length][sigma.length];
    StateListNode[][] active2 = new StateListNode[states.length][sigma.length];
    LinkedList<IntPair> pending = new LinkedList<IntPair>();
    boolean[][] pending2 = new boolean[sigma.length][states.length];
    ArrayList<State> split = new ArrayList<State>();
    boolean[] split2 = new boolean[states.length];
    ArrayList<Integer> refine = new ArrayList<Integer>();
    boolean[] refine2 = new boolean[states.length];
    ArrayList<ArrayList<State>> splitblock = new ArrayList<ArrayList<State>>();
    initialize(splitblock, states.length);
    for (int q = 0; q < states.length; q++) {
      splitblock.set(q, new ArrayList<>());
      partition.set(q, new LinkedList<>());
      for (int x = 0; x < sigma.length; x++) {
        reverse.get(q).set(x, new LinkedList<>());
        active[q][x] = new StateList();
      }
    }
    // find initial partition and reverse edges
    for (int q = 0; q < states.length; q++) {
      State qq = states[q];
      int j;
      if (qq._accept) {
        j = 0;
      } else {
        j = 1;
      }
      partition.get(j).add(qq);
      block[qq._number] = j;
      for (int x = 0; x < sigma.length; x++) {
        char y = sigma[x];
        State p = qq.step(y);
        reverse.get(p._number).get(x).add(qq);
        reverseNonempty[p._number][x] = true;
      }
    }
    // initialize active sets
    for (int j = 0; j <= 1; j++) {
      for (int x = 0; x < sigma.length; x++) {
        for (State qq : partition.get(j)) {
          if (reverseNonempty[qq._number][x]) {
            active2[qq._number][x] = active[j][x].add(qq);
          }
        }
      }
    }
    // initialize pending
    for (int x = 0; x < sigma.length; x++) {
      int a0 = active[0][x]._size;
      int a1 = active[1][x]._size;
      int j;
      if (a0 <= a1) {
        j = 0;
      } else {
        j = 1;
      }
      pending.add(new IntPair(j, x));
      pending2[x][j] = true;
    }
    // process pending until fixed point
    int k = 2;
    while (!pending.isEmpty()) {
      IntPair ip = pending.removeFirst();
      int p = ip._first;
      int x = ip._second;
      pending2[x][p] = false;
      // find states that need to be split off their blocks
      for (StateListNode m = active[p][x]._first; m != null; m = m._next) {
        for (State s : reverse.get(m._q._number).get(x)) {
          if (!split2[s._number]) {
            split2[s._number] = true;
            split.add(s);
            int j = block[s._number];
            splitblock.get(j).add(s);
            if (!refine2[j]) {
              refine2[j] = true;
              refine.add(j);
            }
          }
        }
      }
      // refine blocks
      for (int j : refine) {
        if (splitblock.get(j).size() < partition.get(j).size()) {
          LinkedList<State> b1 = partition.get(j);
          LinkedList<State> b2 = partition.get(k);
          for (State s : splitblock.get(j)) {
            b1.remove(s);
            b2.add(s);
            block[s._number] = k;
            for (int c = 0; c < sigma.length; c++) {
              StateListNode sn = active2[s._number][c];
              if (sn != null && sn._stateList == active[j][c]) {
                sn.remove();
                active2[s._number][c] = active[k][c].add(s);
              }
            }
          }
          // update pending
          for (int c = 0; c < sigma.length; c++) {
            int aj = active[j][c]._size;
            int ak = active[k][c]._size;
            if (!pending2[c][j] && 0 < aj && aj <= ak) {
              pending2[c][j] = true;
              pending.add(new IntPair(j, c));
            } else {
              pending2[c][k] = true;
              pending.add(new IntPair(k, c));
            }
          }
          k++;
        }
        for (State s : splitblock.get(j)) {
          split2[s._number] = false;
        }
        refine2[j] = false;
        splitblock.get(j).clear();
      }
      split.clear();
      refine.clear();
    }
    // make a new state for each equivalence class, set initial state
    State[] newstates = new State[k];
    for (int n = 0; n < newstates.length; n++) {
      State s = new State();
      newstates[n] = s;
      for (State q : partition.get(n)) {
        if (q == a._initial) {
          a._initial = s;
        }
        s._accept = q._accept;
        s._number = q._number; // select representative
        q._number = n;
      }
    }
    // build transitions and set acceptance
    for (int n = 0; n < newstates.length; n++) {
      State s = newstates[n];
      s._accept = states[s._number]._accept;
      for (Transition t : states[s._number]._transitionSet) {
        s._transitionSet.add(new Transition(t._min, t._max, newstates[t._to._number]));
      }
    }
    a.removeDeadTransitions();
  }