package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.IntArrayList;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.analysis.TokenTypeUtils;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.DefaultGroups;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix = "DocumentAssigner")
/* loaded from: input_file:WEB-INF/lib/carrot2-mini-3.9.0.jar:org/carrot2/text/preprocessing/DocumentAssigner.class */
public class DocumentAssigner {

    @Level(AttributeLevel.MEDIUM)
    @Group(DefaultGroups.PREPROCESSING)
    @Processing
    @Input
    @Attribute
    @Label("Exact phrase assignment")
    public boolean exactPhraseAssignment = false;

    @Level(AttributeLevel.MEDIUM)
    @Input
    @Attribute
    @Group(DefaultGroups.PREPROCESSING)
    @Processing
    @IntRange(min = 1, max = 100)
    @Label("Minimum cluster size")
    public int minClusterSize = 2;

    public void assign(PreprocessingContext preprocessingContext) {
        int[] iArr = preprocessingContext.allLabels.featureIndex;
        int[][] iArr2 = preprocessingContext.allStems.tfByDocument;
        int[] iArr3 = preprocessingContext.allWords.stemIndex;
        short[] sArr = preprocessingContext.allWords.type;
        int[][] iArr4 = preprocessingContext.allPhrases.tfByDocument;
        int[][] iArr5 = preprocessingContext.allPhrases.wordIndices;
        int length = iArr3.length;
        int size = preprocessingContext.documents.size();
        BitSet[] bitSetArr = new BitSet[iArr.length];
        for (int i = 0; i < iArr.length; i++) {
            BitSet bitSet = new BitSet(size);
            int i2 = iArr[i];
            if (i2 < length) {
                addTfByDocumentToBitSet(bitSet, iArr2[iArr3[i2]]);
            } else {
                int i3 = i2 - length;
                if (this.exactPhraseAssignment) {
                    addTfByDocumentToBitSet(bitSet, iArr4[i3]);
                } else {
                    boolean z = false;
                    for (int i4 : iArr5[i3]) {
                        if (!TokenTypeUtils.isCommon(sArr[i4])) {
                            if (z) {
                                BitSet bitSet2 = new BitSet(size);
                                addTfByDocumentToBitSet(bitSet2, iArr2[iArr3[i4]]);
                                bitSet.and(bitSet2);
                            } else {
                                addTfByDocumentToBitSet(bitSet, iArr2[iArr3[i4]]);
                                z = true;
                            }
                        }
                    }
                }
            }
            bitSetArr[i] = bitSet;
        }
        if (this.minClusterSize <= 1) {
            preprocessingContext.allLabels.documentIndices = bitSetArr;
            return;
        }
        IntArrayList intArrayList = new IntArrayList(iArr.length);
        ArrayList newArrayListWithExpectedSize = Lists.newArrayListWithExpectedSize(iArr.length);
        for (int i5 = 0; i5 < iArr.length; i5++) {
            if (bitSetArr[i5].cardinality() >= this.minClusterSize) {
                intArrayList.add(iArr[i5]);
                newArrayListWithExpectedSize.add(bitSetArr[i5]);
            }
        }
        preprocessingContext.allLabels.documentIndices = (BitSet[]) newArrayListWithExpectedSize.toArray(new BitSet[newArrayListWithExpectedSize.size()]);
        preprocessingContext.allLabels.featureIndex = intArrayList.toArray();
        LabelFilterProcessor.updateFirstPhraseIndex(preprocessingContext);
    }

    private static void addTfByDocumentToBitSet(BitSet bitSet, int[] iArr) {
        for (int i = 0; i < iArr.length / 2; i++) {
            bitSet.set(iArr[i * 2]);
        }
    }
}
