package org.apache.lucene.analysis.pattern;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.automaton.Automaton;

/* loaded from: input_file:org/apache/lucene/analysis/pattern/TestSimplePatternSplitTokenizer.class */
public class TestSimplePatternSplitTokenizer extends BaseTokenStreamTestCase {
    public void testGreedy() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("(foo)+");
        simplePatternSplitTokenizer.setReader(new StringReader("bar foofoo baz"));
        assertTokenStreamContents(simplePatternSplitTokenizer, new String[]{"bar ", " baz"}, new int[]{0, 10}, new int[]{4, 14});
    }

    public void testBackToBack() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("foo");
        simplePatternSplitTokenizer.setReader(new StringReader("bar foofoo baz"));
        assertTokenStreamContents(simplePatternSplitTokenizer, new String[]{"bar ", " baz"}, new int[]{0, 10}, new int[]{4, 14});
    }

    public void testBigLookahead() throws Exception {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < 100; i++) {
            sb.append('a');
        }
        sb.append('b');
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer(sb.toString());
        CharTermAttribute attribute = simplePatternSplitTokenizer.getAttribute(CharTermAttribute.class);
        StringBuilder sb2 = new StringBuilder();
        for (int i2 = 0; i2 < 200; i2++) {
            sb2.append('a');
        }
        simplePatternSplitTokenizer.setReader(new StringReader(sb2.toString()));
        simplePatternSplitTokenizer.reset();
        assertTrue(simplePatternSplitTokenizer.incrementToken());
        assertEquals(sb2.toString(), attribute.toString());
        assertFalse(simplePatternSplitTokenizer.incrementToken());
    }

    public void testNoTokens() throws Exception {
        String randomUnicodeString;
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer(".*");
        simplePatternSplitTokenizer.getAttribute(CharTermAttribute.class);
        do {
            randomUnicodeString = TestUtil.randomUnicodeString(random());
        } while (randomUnicodeString.length() <= 0);
        simplePatternSplitTokenizer.setReader(new StringReader(randomUnicodeString));
        simplePatternSplitTokenizer.reset();
        assertFalse(simplePatternSplitTokenizer.incrementToken());
    }

    public void testEmptyStringPatternNoMatch() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("a*");
        CharTermAttribute attribute = simplePatternSplitTokenizer.getAttribute(CharTermAttribute.class);
        simplePatternSplitTokenizer.setReader(new StringReader("bbb"));
        simplePatternSplitTokenizer.reset();
        assertTrue(simplePatternSplitTokenizer.incrementToken());
        assertEquals("bbb", attribute.toString());
        assertFalse(simplePatternSplitTokenizer.incrementToken());
    }

    public void testSplitSingleCharWhitespace() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("[ \t\r\n]");
        simplePatternSplitTokenizer.getAttribute(CharTermAttribute.class);
        simplePatternSplitTokenizer.setReader(new StringReader("a \tb   c"));
        assertTokenStreamContents(simplePatternSplitTokenizer, new String[]{"a", "b", "c"}, new int[]{0, 3, 7}, new int[]{1, 4, 8});
    }

    public void testSplitMultiCharWhitespace() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("[ \t\r\n]*");
        simplePatternSplitTokenizer.getAttribute(CharTermAttribute.class);
        simplePatternSplitTokenizer.setReader(new StringReader("a \tb   c"));
        assertTokenStreamContents(simplePatternSplitTokenizer, new String[]{"a", "b", "c"}, new int[]{0, 3, 7}, new int[]{1, 4, 8});
    }

    public void testLeadingNonToken() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("[ \t\r\n]*");
        simplePatternSplitTokenizer.getAttribute(CharTermAttribute.class);
        simplePatternSplitTokenizer.setReader(new StringReader("    a c"));
        assertTokenStreamContents(simplePatternSplitTokenizer, new String[]{"a", "c"}, new int[]{4, 6}, new int[]{5, 7});
    }

    public void testTrailingNonToken() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("[ \t\r\n]*");
        simplePatternSplitTokenizer.getAttribute(CharTermAttribute.class);
        simplePatternSplitTokenizer.setReader(new StringReader("a c   "));
        assertTokenStreamContents(simplePatternSplitTokenizer, new String[]{"a", "c"}, new int[]{0, 2}, new int[]{1, 3});
    }

    public void testEmptyStringPatternOneMatch() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("a*");
        simplePatternSplitTokenizer.getAttribute(CharTermAttribute.class);
        simplePatternSplitTokenizer.setReader(new StringReader("bbab"));
        assertTokenStreamContents(simplePatternSplitTokenizer, new String[]{"bb", "b"}, new int[]{0, 3}, new int[]{2, 4});
    }

    public void testEndOffset() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("a+");
        CharTermAttribute attribute = simplePatternSplitTokenizer.getAttribute(CharTermAttribute.class);
        OffsetAttribute attribute2 = simplePatternSplitTokenizer.getAttribute(OffsetAttribute.class);
        simplePatternSplitTokenizer.setReader(new StringReader("aaabbb"));
        simplePatternSplitTokenizer.reset();
        assertTrue(simplePatternSplitTokenizer.incrementToken());
        assertEquals("bbb", attribute.toString());
        assertFalse(simplePatternSplitTokenizer.incrementToken());
        simplePatternSplitTokenizer.end();
        assertEquals(6L, attribute2.endOffset());
    }

    public void testFixedToken() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("aaaa");
        simplePatternSplitTokenizer.setReader(new StringReader("aaaaaaaaaaaaaaa"));
        assertTokenStreamContents(simplePatternSplitTokenizer, new String[]{"aaa"}, new int[]{12}, new int[]{15});
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void testBasic() throws Exception {
        for (Object[] objArr : new String[]{new String[]{"--", "aaa--bbb--ccc", "aaa bbb ccc"}, new String[]{":", "aaa:bbb:ccc", "aaa bbb ccc"}, new String[]{":", "boo:and:foo", "boo and foo"}, new String[]{"o", "boo:and:foo", "b :and:f"}}) {
            Tokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer(objArr[0]);
            simplePatternSplitTokenizer.setReader(new StringReader(objArr[1]));
            assertEquals("pattern: " + objArr[0] + " with input: " + objArr[1], objArr[2], tsToString(simplePatternSplitTokenizer));
        }
    }

    public void testNotDeterminized() throws Exception {
        Automaton automaton = new Automaton();
        int createState = automaton.createState();
        int createState2 = automaton.createState();
        int createState3 = automaton.createState();
        int createState4 = automaton.createState();
        automaton.setAccept(createState4, true);
        automaton.addTransition(createState, createState2, 97, 122);
        automaton.addTransition(createState, createState3, 97, 122);
        automaton.addTransition(createState2, createState4, 98);
        automaton.addTransition(createState3, createState4, 98);
        expectThrows(IllegalArgumentException.class, () -> {
            new SimplePatternSplitTokenizer(automaton);
        });
    }

    public void testOffsetCorrection() throws Exception {
        new ArrayList().add("\"&uuml;\" => \"ü\"");
        NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
        builder.add("&uuml;", "ü");
        MappingCharFilter mappingCharFilter = new MappingCharFilter(builder.build(), new StringReader("G&uuml;nther G&uuml;nther is here"));
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("Günther");
        simplePatternSplitTokenizer.setReader(mappingCharFilter);
        assertTokenStreamContents(simplePatternSplitTokenizer, new String[]{" ", " is here"}, new int[]{12, 25}, new int[]{13, 33}, Integer.valueOf("G&uuml;nther G&uuml;nther is here".length()));
    }

    private static String tsToString(TokenStream tokenStream) throws IOException {
        StringBuilder sb = new StringBuilder();
        CharTermAttribute addAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.clearAttributes();
        addAttribute.setEmpty().append("bogusTerm");
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            if (sb.length() > 0) {
                sb.append(' ');
            }
            sb.append(addAttribute.toString());
            tokenStream.clearAttributes();
            addAttribute.setEmpty().append("bogusTerm");
        }
        tokenStream.close();
        return sb.toString();
    }

    public void testRandomStrings() throws Exception {
        Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.pattern.TestSimplePatternSplitTokenizer.1
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                return new Analyzer.TokenStreamComponents(new SimplePatternSplitTokenizer("a"));
            }
        };
        checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
        analyzer.close();
        Analyzer analyzer2 = new Analyzer() { // from class: org.apache.lucene.analysis.pattern.TestSimplePatternSplitTokenizer.2
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                return new Analyzer.TokenStreamComponents(new SimplePatternSplitTokenizer("a"));
            }
        };
        checkRandomData(random(), analyzer2, 1000 * RANDOM_MULTIPLIER);
        analyzer2.close();
    }

    public void testEndLookahead() throws Exception {
        SimplePatternSplitTokenizer simplePatternSplitTokenizer = new SimplePatternSplitTokenizer("(ab)+");
        simplePatternSplitTokenizer.setReader(new StringReader("aba"));
        assertTokenStreamContents(simplePatternSplitTokenizer, new String[]{"a"}, new int[]{2}, new int[]{3}, 3);
    }
}
