in pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/automaton/MinimizationOperations.java [238:412]
public static void minimizeHopcroft(Automaton a) {
a.determinize();
Set<Transition> tr = a._initial.getTransitionSet();
if (tr.size() == 1) {
Transition t = tr.iterator().next();
if (t._to == a._initial && t._min == Character.MIN_VALUE && t._max == Character.MAX_VALUE) {
return;
}
}
a.totalize();
// make arrays for numbered states and effective alphabet
Set<State> ss = a.getStates();
State[] states = new State[ss.size()];
int number = 0;
for (State q : ss) {
states[number] = q;
q._number = number++;
}
char[] sigma = a.getStartPoints();
// initialize data structures
ArrayList<ArrayList<LinkedList<State>>> reverse = new ArrayList<ArrayList<LinkedList<State>>>();
for (int q = 0; q < states.length; q++) {
ArrayList<LinkedList<State>> v = new ArrayList<LinkedList<State>>();
initialize(v, sigma.length);
reverse.add(v);
}
boolean[][] reverseNonempty = new boolean[states.length][sigma.length];
ArrayList<LinkedList<State>> partition = new ArrayList<LinkedList<State>>();
initialize(partition, states.length);
int[] block = new int[states.length];
StateList[][] active = new StateList[states.length][sigma.length];
StateListNode[][] active2 = new StateListNode[states.length][sigma.length];
LinkedList<IntPair> pending = new LinkedList<IntPair>();
boolean[][] pending2 = new boolean[sigma.length][states.length];
ArrayList<State> split = new ArrayList<State>();
boolean[] split2 = new boolean[states.length];
ArrayList<Integer> refine = new ArrayList<Integer>();
boolean[] refine2 = new boolean[states.length];
ArrayList<ArrayList<State>> splitblock = new ArrayList<ArrayList<State>>();
initialize(splitblock, states.length);
for (int q = 0; q < states.length; q++) {
splitblock.set(q, new ArrayList<>());
partition.set(q, new LinkedList<>());
for (int x = 0; x < sigma.length; x++) {
reverse.get(q).set(x, new LinkedList<>());
active[q][x] = new StateList();
}
}
// find initial partition and reverse edges
for (int q = 0; q < states.length; q++) {
State qq = states[q];
int j;
if (qq._accept) {
j = 0;
} else {
j = 1;
}
partition.get(j).add(qq);
block[qq._number] = j;
for (int x = 0; x < sigma.length; x++) {
char y = sigma[x];
State p = qq.step(y);
reverse.get(p._number).get(x).add(qq);
reverseNonempty[p._number][x] = true;
}
}
// initialize active sets
for (int j = 0; j <= 1; j++) {
for (int x = 0; x < sigma.length; x++) {
for (State qq : partition.get(j)) {
if (reverseNonempty[qq._number][x]) {
active2[qq._number][x] = active[j][x].add(qq);
}
}
}
}
// initialize pending
for (int x = 0; x < sigma.length; x++) {
int a0 = active[0][x]._size;
int a1 = active[1][x]._size;
int j;
if (a0 <= a1) {
j = 0;
} else {
j = 1;
}
pending.add(new IntPair(j, x));
pending2[x][j] = true;
}
// process pending until fixed point
int k = 2;
while (!pending.isEmpty()) {
IntPair ip = pending.removeFirst();
int p = ip._first;
int x = ip._second;
pending2[x][p] = false;
// find states that need to be split off their blocks
for (StateListNode m = active[p][x]._first; m != null; m = m._next) {
for (State s : reverse.get(m._q._number).get(x)) {
if (!split2[s._number]) {
split2[s._number] = true;
split.add(s);
int j = block[s._number];
splitblock.get(j).add(s);
if (!refine2[j]) {
refine2[j] = true;
refine.add(j);
}
}
}
}
// refine blocks
for (int j : refine) {
if (splitblock.get(j).size() < partition.get(j).size()) {
LinkedList<State> b1 = partition.get(j);
LinkedList<State> b2 = partition.get(k);
for (State s : splitblock.get(j)) {
b1.remove(s);
b2.add(s);
block[s._number] = k;
for (int c = 0; c < sigma.length; c++) {
StateListNode sn = active2[s._number][c];
if (sn != null && sn._stateList == active[j][c]) {
sn.remove();
active2[s._number][c] = active[k][c].add(s);
}
}
}
// update pending
for (int c = 0; c < sigma.length; c++) {
int aj = active[j][c]._size;
int ak = active[k][c]._size;
if (!pending2[c][j] && 0 < aj && aj <= ak) {
pending2[c][j] = true;
pending.add(new IntPair(j, c));
} else {
pending2[c][k] = true;
pending.add(new IntPair(k, c));
}
}
k++;
}
for (State s : splitblock.get(j)) {
split2[s._number] = false;
}
refine2[j] = false;
splitblock.get(j).clear();
}
split.clear();
refine.clear();
}
// make a new state for each equivalence class, set initial state
State[] newstates = new State[k];
for (int n = 0; n < newstates.length; n++) {
State s = new State();
newstates[n] = s;
for (State q : partition.get(n)) {
if (q == a._initial) {
a._initial = s;
}
s._accept = q._accept;
s._number = q._number; // select representative
q._number = n;
}
}
// build transitions and set acceptance
for (int n = 0; n < newstates.length; n++) {
State s = newstates[n];
s._accept = states[s._number]._accept;
for (Transition t : states[s._number]._transitionSet) {
s._transitionSet.add(new Transition(t._min, t._max, newstates[t._to._number]));
}
}
a.removeDeadTransitions();
}