package org.carrot2.text.vsm;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.sorting.IndirectComparator;
import com.carrotsearch.hppc.sorting.IndirectSort;
import com.drew.metadata.exif.ExifDirectoryBase;
import org.apache.calcite.adapter.enumerable.EnumerableConvention;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.mahout.math.matrix.DoubleMatrix2D;
import org.carrot2.mahout.math.matrix.impl.DenseDoubleMatrix2D;
import org.carrot2.mahout.math.matrix.impl.SparseDoubleMatrix2D;
import org.carrot2.matrix.MatrixUtils;
import org.carrot2.text.analysis.TokenTypeUtils;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.DefaultGroups;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.DoubleRange;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix = "TermDocumentMatrixBuilder")
/* loaded from: input_file:libs/carrot2-mini-3.15.0.jar:org/carrot2/text/vsm/TermDocumentMatrixBuilder.class */
public class TermDocumentMatrixBuilder {
    public static final String MATRIX_MODEL = "Matrix model";

    @Level(AttributeLevel.MEDIUM)
    @Group(DefaultGroups.LABELS)
    @Processing
    @Input
    @Attribute
    @DoubleRange(min = 0.0d, max = 10.0d)
    public double titleWordsBoost = 2.0d;

    @Level(AttributeLevel.ADVANCED)
    @Input
    @Attribute
    @Group(MATRIX_MODEL)
    @Processing
    @IntRange(min = 5000)
    @Internal(configuration = true)
    public int maximumMatrixSize = ExifDirectoryBase.TAG_MAKERNOTE;

    @Level(AttributeLevel.ADVANCED)
    @Group(MATRIX_MODEL)
    @Processing
    @Input
    @Attribute
    @DoubleRange(min = 0.0d, max = EnumerableConvention.COST_MULTIPLIER)
    public double maxWordDf = 0.9d;

    @Level(AttributeLevel.ADVANCED)
    @Input
    @Attribute
    @ImplementingClasses(classes = {LogTfIdfTermWeighting.class, LinearTfIdfTermWeighting.class, TfTermWeighting.class}, strict = false)
    @Group(MATRIX_MODEL)
    @Processing
    @Required
    public ITermWeighting termWeighting = new LogTfIdfTermWeighting();

    public void buildTermDocumentMatrix(VectorSpaceModelContext vectorSpaceModelContext) {
        PreprocessingContext preprocessingContext = vectorSpaceModelContext.preprocessingContext;
        int size = preprocessingContext.documents.size();
        int[] iArr = preprocessingContext.allStems.tf;
        int[][] iArr2 = preprocessingContext.allStems.tfByDocument;
        byte[] bArr = preprocessingContext.allStems.fieldIndices;
        if (size == 0) {
            vectorSpaceModelContext.termDocumentMatrix = new DenseDoubleMatrix2D(0, 0);
            vectorSpaceModelContext.stemToRowIndex = new IntIntHashMap();
            return;
        }
        int i = -1;
        String[] strArr = preprocessingContext.allFields.name;
        int i2 = 0;
        while (true) {
            if (i2 >= strArr.length) {
                break;
            }
            if ("title".equals(strArr[i2])) {
                i = i2;
                break;
            }
            i2++;
        }
        int[] computeRequiredStemIndices = computeRequiredStemIndices(preprocessingContext);
        double[] dArr = new double[computeRequiredStemIndices.length];
        for (int i3 = 0; i3 < computeRequiredStemIndices.length; i3++) {
            int i4 = computeRequiredStemIndices[i3];
            dArr[i3] = this.termWeighting.calculateTermWeight(iArr[i4], iArr2[i4].length / 2, size) * getWeightBoost(i, bArr[i4]);
        }
        int[] mergesort = IndirectSort.mergesort(0, dArr.length, new IndirectComparator.DescendingDoubleComparator(dArr));
        int i5 = this.maximumMatrixSize / size;
        DenseDoubleMatrix2D denseDoubleMatrix2D = new DenseDoubleMatrix2D(Math.min(i5, computeRequiredStemIndices.length), size);
        for (int i6 = 0; i6 < mergesort.length && i6 < i5; i6++) {
            int i7 = computeRequiredStemIndices[mergesort[i6]];
            int[] iArr3 = iArr2[i7];
            int length = iArr3.length / 2;
            byte b = bArr[i7];
            for (int i8 = 0; i8 < length; i8++) {
                denseDoubleMatrix2D.set(i6, iArr3[i8 * 2], this.termWeighting.calculateTermWeight(iArr3[(i8 * 2) + 1], length, size) * getWeightBoost(i, b));
            }
        }
        IntIntHashMap intIntHashMap = new IntIntHashMap();
        for (int i9 = 0; i9 < mergesort.length && i9 < denseDoubleMatrix2D.rows(); i9++) {
            intIntHashMap.put(computeRequiredStemIndices[mergesort[i9]], i9);
        }
        vectorSpaceModelContext.termDocumentMatrix = denseDoubleMatrix2D;
        vectorSpaceModelContext.stemToRowIndex = intIntHashMap;
    }

    public void buildTermPhraseMatrix(VectorSpaceModelContext vectorSpaceModelContext) {
        PreprocessingContext preprocessingContext = vectorSpaceModelContext.preprocessingContext;
        IntIntHashMap intIntHashMap = vectorSpaceModelContext.stemToRowIndex;
        int[] iArr = preprocessingContext.allLabels.featureIndex;
        int i = preprocessingContext.allLabels.firstPhraseIndex;
        if (i < 0 || intIntHashMap.size() <= 0) {
            return;
        }
        int[] iArr2 = new int[iArr.length - i];
        for (int i2 = 0; i2 < iArr2.length; i2++) {
            iArr2[i2] = iArr[i2 + i];
        }
        DoubleMatrix2D buildAlignedMatrix = buildAlignedMatrix(vectorSpaceModelContext, iArr2, this.termWeighting);
        MatrixUtils.normalizeColumnL2(buildAlignedMatrix, null);
        vectorSpaceModelContext.termPhraseMatrix = buildAlignedMatrix.viewDice();
    }

    private double getWeightBoost(int i, byte b) {
        if ((b & (1 << i)) != 0) {
            return this.titleWordsBoost;
        }
        return 1.0d;
    }

    private int[] computeRequiredStemIndices(PreprocessingContext preprocessingContext) {
        int[] iArr = preprocessingContext.allLabels.featureIndex;
        int[] iArr2 = preprocessingContext.allWords.stemIndex;
        short[] sArr = preprocessingContext.allWords.type;
        int[][] iArr3 = preprocessingContext.allPhrases.wordIndices;
        int length = iArr2.length;
        int[][] iArr4 = preprocessingContext.allStems.tfByDocument;
        int size = preprocessingContext.documents.size();
        BitSet bitSet = new BitSet(iArr.length);
        for (int i : iArr) {
            if (i < length) {
                addStemIndex(iArr2, size, iArr4, bitSet, i);
            } else {
                for (int i2 : iArr3[i - length]) {
                    if (!TokenTypeUtils.isCommon(sArr[i2])) {
                        addStemIndex(iArr2, size, iArr4, bitSet, i2);
                    }
                }
            }
        }
        return bitSet.asIntLookupContainer().toArray();
    }

    private void addStemIndex(int[] iArr, int i, int[][] iArr2, BitSet bitSet, int i2) {
        int i3 = iArr[i2];
        if ((iArr2[i3].length / 2) / i <= this.maxWordDf) {
            bitSet.set(i3);
        }
    }

    static DoubleMatrix2D buildAlignedMatrix(VectorSpaceModelContext vectorSpaceModelContext, int[] iArr, ITermWeighting iTermWeighting) {
        IntIntHashMap intIntHashMap = vectorSpaceModelContext.stemToRowIndex;
        if (iArr.length == 0) {
            return new DenseDoubleMatrix2D(intIntHashMap.size(), 0);
        }
        SparseDoubleMatrix2D sparseDoubleMatrix2D = new SparseDoubleMatrix2D(intIntHashMap.size(), iArr.length);
        PreprocessingContext preprocessingContext = vectorSpaceModelContext.preprocessingContext;
        int[] iArr2 = preprocessingContext.allWords.stemIndex;
        int[] iArr3 = preprocessingContext.allStems.tf;
        int[][] iArr4 = preprocessingContext.allStems.tfByDocument;
        int[][] iArr5 = preprocessingContext.allPhrases.wordIndices;
        int size = preprocessingContext.documents.size();
        int length = iArr2.length;
        for (int i = 0; i < iArr.length; i++) {
            int i2 = iArr[i];
            for (int i3 : i2 < length ? new int[]{i2} : iArr5[i2 - length]) {
                int i4 = iArr2[i3];
                int indexOf = intIntHashMap.indexOf(i4);
                if (intIntHashMap.indexExists(indexOf)) {
                    sparseDoubleMatrix2D.setQuick(intIntHashMap.indexGet(indexOf), i, iTermWeighting.calculateTermWeight(iArr3[i4], iArr4[i4].length / 2, size));
                }
            }
        }
        return sparseDoubleMatrix2D;
    }
}
