package org.apache.lucene.analysis.ngram;

import com.carrotsearch.randomizedtesting.generators.RandomStrings;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.TestUtil;

/* loaded from: input_file:org/apache/lucene/analysis/ngram/NGramTokenizerTest.class */
public class NGramTokenizerTest extends BaseTokenStreamTestCase {
    private StringReader input;

    public void setUp() throws Exception {
        super.setUp();
        this.input = new StringReader("abcde");
    }

    public void testInvalidInput() throws Exception {
        expectThrows(IllegalArgumentException.class, () -> {
            new NGramTokenizer(2, 1);
        });
    }

    public void testInvalidInput2() throws Exception {
        expectThrows(IllegalArgumentException.class, () -> {
            new NGramTokenizer(0, 1).setReader(this.input);
        });
    }

    public void testUnigrams() throws Exception {
        NGramTokenizer nGramTokenizer = new NGramTokenizer(1, 1);
        nGramTokenizer.setReader(this.input);
        assertTokenStreamContents(nGramTokenizer, new String[]{"a", "b", "c", "d", "e"}, new int[]{0, 1, 2, 3, 4}, new int[]{1, 2, 3, 4, 5}, 5);
    }

    public void testBigrams() throws Exception {
        NGramTokenizer nGramTokenizer = new NGramTokenizer(2, 2);
        nGramTokenizer.setReader(this.input);
        assertTokenStreamContents(nGramTokenizer, new String[]{"ab", "bc", "cd", "de"}, new int[]{0, 1, 2, 3}, new int[]{2, 3, 4, 5}, 5);
    }

    public void testNgrams() throws Exception {
        NGramTokenizer nGramTokenizer = new NGramTokenizer(1, 3);
        nGramTokenizer.setReader(this.input);
        assertTokenStreamContents(nGramTokenizer, new String[]{"a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e"}, new int[]{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4}, new int[]{1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 5}, null, null, null, 5, false);
    }

    public void testOversizedNgrams() throws Exception {
        NGramTokenizer nGramTokenizer = new NGramTokenizer(6, 7);
        nGramTokenizer.setReader(this.input);
        assertTokenStreamContents(nGramTokenizer, new String[0], new int[0], new int[0], 5);
    }

    public void testReset() throws Exception {
        NGramTokenizer nGramTokenizer = new NGramTokenizer(1, 1);
        nGramTokenizer.setReader(this.input);
        assertTokenStreamContents(nGramTokenizer, new String[]{"a", "b", "c", "d", "e"}, new int[]{0, 1, 2, 3, 4}, new int[]{1, 2, 3, 4, 5}, 5);
        nGramTokenizer.setReader(new StringReader("abcde"));
        assertTokenStreamContents(nGramTokenizer, new String[]{"a", "b", "c", "d", "e"}, new int[]{0, 1, 2, 3, 4}, new int[]{1, 2, 3, 4, 5}, 5);
    }

    public void testRandomStrings() throws Exception {
        int i = TEST_NIGHTLY ? 10 : 1;
        for (int i2 = 0; i2 < i; i2++) {
            final int nextInt = TestUtil.nextInt(random(), 2, 10);
            final int nextInt2 = TestUtil.nextInt(random(), nextInt, 20);
            Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.ngram.NGramTokenizerTest.1
                protected Analyzer.TokenStreamComponents createComponents(String str) {
                    NGramTokenizer nGramTokenizer = new NGramTokenizer(nextInt, nextInt2);
                    return new Analyzer.TokenStreamComponents(nGramTokenizer, nGramTokenizer);
                }
            };
            checkRandomData(random(), analyzer, 200 * RANDOM_MULTIPLIER, 20);
            checkRandomData(random(), analyzer, 10 * RANDOM_MULTIPLIER, 1027);
            analyzer.close();
        }
    }

    private static void testNGrams(int i, int i2, int i3, String str) throws IOException {
        testNGrams(i, i2, RandomStrings.randomAsciiOfLength(random(), i3), str);
    }

    private static void testNGrams(int i, int i2, String str, String str2) throws IOException {
        testNGrams(i, i2, str, str2, false);
    }

    static int[] toCodePoints(CharSequence charSequence) {
        int[] iArr = new int[Character.codePointCount(charSequence, 0, charSequence.length())];
        int i = 0;
        int i2 = 0;
        while (i < charSequence.length()) {
            iArr[i2] = Character.codePointAt(charSequence, i);
            i += Character.charCount(iArr[i2]);
            i2++;
        }
        return iArr;
    }

    static boolean isTokenChar(String str, int i) {
        int i2 = 0;
        while (true) {
            int i3 = i2;
            if (i3 >= str.length()) {
                return true;
            }
            int codePointAt = str.codePointAt(i3);
            if (codePointAt == i) {
                return false;
            }
            i2 = i3 + Character.charCount(codePointAt);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static void testNGrams(int i, int i2, String str, final String str2, boolean z) throws IOException {
        int[] codePoints = toCodePoints(str);
        int[] iArr = new int[codePoints.length + 1];
        for (int i3 = 0; i3 < codePoints.length; i3++) {
            iArr[i3 + 1] = iArr[i3] + Character.charCount(codePoints[i3]);
        }
        NGramTokenizer nGramTokenizer = new NGramTokenizer(i, i2, z) { // from class: org.apache.lucene.analysis.ngram.NGramTokenizerTest.2
            protected boolean isTokenChar(int i4) {
                return str2.indexOf(i4) < 0;
            }
        };
        nGramTokenizer.setReader(new StringReader(str));
        CharTermAttribute addAttribute = nGramTokenizer.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute addAttribute2 = nGramTokenizer.addAttribute(PositionIncrementAttribute.class);
        PositionLengthAttribute addAttribute3 = nGramTokenizer.addAttribute(PositionLengthAttribute.class);
        OffsetAttribute addAttribute4 = nGramTokenizer.addAttribute(OffsetAttribute.class);
        nGramTokenizer.reset();
        for (int i4 = 0; i4 < codePoints.length; i4++) {
            for (int i5 = i4 + i; i5 <= i4 + i2 && i5 <= codePoints.length; i5++) {
                if (!z || i4 <= 0 || !isTokenChar(str2, codePoints[i4 - 1])) {
                    int i6 = i4;
                    while (true) {
                        if (i6 >= i5) {
                            assertTrue(nGramTokenizer.incrementToken());
                            assertArrayEquals(Arrays.copyOfRange(codePoints, i4, i5), toCodePoints(addAttribute));
                            assertEquals(1L, addAttribute2.getPositionIncrement());
                            assertEquals(1L, addAttribute3.getPositionLength());
                            assertEquals(iArr[i4], addAttribute4.startOffset());
                            assertEquals(iArr[i5], addAttribute4.endOffset());
                            break;
                        }
                        if (!isTokenChar(str2, codePoints[i6])) {
                            break;
                        } else {
                            i6++;
                        }
                    }
                }
            }
        }
        assertFalse(nGramTokenizer.incrementToken());
        nGramTokenizer.end();
        assertEquals(str.length(), addAttribute4.startOffset());
        assertEquals(str.length(), addAttribute4.endOffset());
    }

    public void testLargeInput() throws IOException {
        int nextInt = TestUtil.nextInt(random(), 1, 100);
        testNGrams(nextInt, TestUtil.nextInt(random(), nextInt, 100), TestUtil.nextInt(random(), 3072, 4096), "");
    }

    public void testLargeMaxGram() throws IOException {
        int nextInt = TestUtil.nextInt(random(), 1290, 1300);
        testNGrams(nextInt, TestUtil.nextInt(random(), nextInt, 1300), TestUtil.nextInt(random(), 3072, 4096), "");
    }

    public void testPreTokenization() throws IOException {
        int nextInt = TestUtil.nextInt(random(), 1, 100);
        testNGrams(nextInt, TestUtil.nextInt(random(), nextInt, 100), TestUtil.nextInt(random(), 0, 4096), "a");
    }

    public void testHeavyPreTokenization() throws IOException {
        int nextInt = TestUtil.nextInt(random(), 1, 100);
        testNGrams(nextInt, TestUtil.nextInt(random(), nextInt, 100), TestUtil.nextInt(random(), 0, 4096), "abcdef");
    }

    public void testFewTokenChars() throws IOException {
        char[] cArr = new char[TestUtil.nextInt(random(), 4000, 5000)];
        Arrays.fill(cArr, ' ');
        for (int i = 0; i < cArr.length; i++) {
            if (random().nextFloat() < 0.1d) {
                cArr[i] = 'a';
            }
        }
        int nextInt = TestUtil.nextInt(random(), 1, 2);
        testNGrams(nextInt, TestUtil.nextInt(random(), nextInt, 2), new String(cArr), " ");
    }

    public void testFullUTF8Range() throws IOException {
        int nextInt = TestUtil.nextInt(random(), 1, 100);
        int nextInt2 = TestUtil.nextInt(random(), nextInt, 100);
        String randomUnicodeString = TestUtil.randomUnicodeString(random(), 4096);
        testNGrams(nextInt, nextInt2, randomUnicodeString, "");
        testNGrams(nextInt, nextInt2, randomUnicodeString, "abcdef");
    }
}
