package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntAssociativeContainer;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.carrot2.core.attribute.Processing;
import org.carrot2.shaded.guava.common.collect.Lists;
import org.carrot2.util.IntMapUtils;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.DefaultGroups;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix = "PhraseExtractor")
/* loaded from: input_file:libs/carrot2-mini-3.15.0.jar:org/carrot2/text/preprocessing/PhraseExtractor.class */
public class PhraseExtractor {
    private static final int MIN_PHRASE_LENGTH = 2;
    static final int MAX_PHRASE_LENGTH = 8;

    @Level(AttributeLevel.ADVANCED)
    @Input
    @Attribute
    @Group(DefaultGroups.PHRASE_EXTRACTION)
    @Processing
    @IntRange(min = 1, max = 100)
    @Label("Phrase document frequency threshold")
    public int dfThreshold = 1;
    private SuffixSorter suffixSorter = new SuffixSorter();

    public void extractPhrases(PreprocessingContext preprocessingContext) {
        this.suffixSorter.suffixSort(preprocessingContext);
        int[] iArr = preprocessingContext.allTokens.suffixOrder;
        int[] iArr2 = preprocessingContext.allTokens.lcp;
        int[] iArr3 = preprocessingContext.allTokens.wordIndex;
        int[] iArr4 = preprocessingContext.allTokens.documentIndex;
        int[] iArr5 = preprocessingContext.allWords.stemIndex;
        List<Substring> discoverRcs = discoverRcs(iArr, iArr2, iArr4);
        ArrayList newArrayList = Lists.newArrayList();
        IntArrayList intArrayList = new IntArrayList();
        ArrayList newArrayList2 = Lists.newArrayList();
        if (discoverRcs.size() > 0) {
            Collections.sort(discoverRcs, new SubstringComparator(iArr3, iArr5));
            int i = discoverRcs.get(0).frequency;
            Substring substring = discoverRcs.get(0);
            IntIntHashMap intIntHashMap = new IntIntHashMap();
            intIntHashMap.putAll((IntIntAssociativeContainer) substring.tfByDocument);
            for (int i2 = 0; i2 < discoverRcs.size() - 1; i2++) {
                Substring substring2 = discoverRcs.get(i2);
                Substring substring3 = discoverRcs.get(i2 + 1);
                if (substring2.isEquivalentTo(substring3, iArr3, iArr5)) {
                    i += substring3.frequency;
                    addAllWithOffset(intIntHashMap, substring3.tfByDocument, -1);
                    if (substring.frequency < substring3.frequency) {
                        substring = substring3;
                    }
                } else {
                    int[] iArr6 = new int[substring.to - substring.from];
                    for (int i3 = 0; i3 < iArr6.length; i3++) {
                        iArr6[i3] = iArr3[substring.from + i3];
                    }
                    newArrayList.add(iArr6);
                    intArrayList.add(i);
                    newArrayList2.add(IntMapUtils.flatten(intIntHashMap));
                    i = substring3.frequency;
                    substring = substring3;
                    intIntHashMap.clear();
                    intIntHashMap.putAll((IntIntAssociativeContainer) substring3.tfByDocument);
                }
            }
            Substring substring4 = discoverRcs.get(discoverRcs.size() - 1);
            int[] iArr7 = new int[substring4.to - substring4.from];
            for (int i4 = 0; i4 < iArr7.length; i4++) {
                iArr7[i4] = iArr3[substring.from + i4];
            }
            newArrayList.add(iArr7);
            intArrayList.add(i);
            newArrayList2.add(IntMapUtils.flatten(intIntHashMap));
        }
        preprocessingContext.allPhrases.wordIndices = (int[][]) newArrayList.toArray((Object[]) new int[newArrayList.size()]);
        preprocessingContext.allPhrases.tf = intArrayList.toArray();
        preprocessingContext.allPhrases.tfByDocument = (int[][]) newArrayList2.toArray((Object[]) new int[newArrayList2.size()]);
    }

    private List<Substring> discoverRcs(int[] iArr, int[] iArr2, int[] iArr3) {
        Substring[] substringArr = new Substring[iArr2.length];
        int i = -1;
        int i2 = 1;
        ArrayList newArrayList = Lists.newArrayList();
        while (i2 < iArr2.length - 1) {
            int i3 = iArr[i2];
            int i4 = iArr3[i3];
            int min = Math.min(8, iArr2[i2]);
            if (i < 0) {
                if (min >= 2) {
                    int i5 = min - 2;
                    while (i5 >= 0) {
                        i++;
                        substringArr[i] = new Substring(i2, i3, (i3 + min) - i5, i5 == 0 ? 2 : 1);
                        substringArr[i].tfByDocument = new IntIntHashMap();
                        substringArr[i].tfByDocument.put(iArr3[iArr[i2 - 1]], 1);
                        if (i5 == 0) {
                            substringArr[i].tfByDocument.putOrAdd(i4, 1, 1);
                        } else {
                            substringArr[i].documentIndexToOffset = iArr3[iArr[i2 - 1]];
                        }
                        i5--;
                    }
                }
                i2++;
            } else {
                Substring substring = substringArr[i];
                if (substring.to - substring.from < min) {
                    Substring substring2 = substringArr[i];
                    substring2.documentIndexToOffset = iArr3[iArr[i2 - 1]];
                    int i6 = (min - (substring2.to - substring2.from)) - 1;
                    while (i6 >= 0) {
                        if (min - i6 >= 2) {
                            i++;
                            substringArr[i] = new Substring(i2, i3, (i3 + min) - i6, i6 == 0 ? 2 : 1);
                            substringArr[i].tfByDocument = new IntIntHashMap();
                            substringArr[i].tfByDocument.put(iArr3[iArr[i2 - 1]], 1);
                            if (i6 == 0) {
                                substringArr[i].tfByDocument.putOrAdd(i4, 1, 1);
                            } else {
                                substringArr[i].documentIndexToOffset = iArr3[iArr[i2 - 1]];
                            }
                        }
                        i6--;
                    }
                    i2++;
                } else {
                    Substring substring3 = substringArr[i];
                    if (substring3.to - substring3.from == min) {
                        substringArr[i].frequency++;
                        substringArr[i].tfByDocument.putOrAdd(i4, 1, 1);
                        i2++;
                    } else {
                        do {
                            if (substringArr[i].tfByDocument.size() >= this.dfThreshold) {
                                newArrayList.add(substringArr[i]);
                            }
                            Substring substring4 = substringArr[i];
                            i--;
                            if (i >= 0) {
                                substringArr[i].frequency += substring4.frequency - 1;
                                addAllWithOffset(substringArr[i].tfByDocument, substring4.tfByDocument, substringArr[i].documentIndexToOffset);
                            }
                            if (i >= 0) {
                            }
                        } while (substringArr[i].to - substringArr[i].from > min);
                    }
                }
            }
        }
        return newArrayList;
    }

    private static void addAllWithOffset(IntIntHashMap intIntHashMap, IntIntHashMap intIntHashMap2, int i) {
        Iterator<IntIntCursor> it = intIntHashMap2.iterator();
        while (it.hasNext()) {
            IntIntCursor next = it.next();
            int i2 = next.key;
            int i3 = next.value + (i2 != i ? 0 : -1);
            intIntHashMap.putOrAdd(i2, i3, i3);
        }
    }
}
