package org.apache.lucene.analysis.miscellaneous;

import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.IOUtils;
import org.junit.Test;

/* loaded from: input_file:org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.class */
public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {

    /* loaded from: input_file:org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter$LargePosIncTokenFilter.class */
    private static final class LargePosIncTokenFilter extends TokenFilter {
        private CharTermAttribute termAtt;
        private PositionIncrementAttribute posIncAtt;

        protected LargePosIncTokenFilter(TokenStream tokenStream) {
            super(tokenStream);
            this.termAtt = addAttribute(CharTermAttribute.class);
            this.posIncAtt = addAttribute(PositionIncrementAttribute.class);
        }

        public boolean incrementToken() throws IOException {
            if (!this.input.incrementToken()) {
                return false;
            }
            if (!this.termAtt.toString().equals("largegap") && !this.termAtt.toString().equals("/")) {
                return true;
            }
            this.posIncAtt.setPositionIncrement(10);
            return true;
        }
    }

    @Test
    public void testOffsets() throws IOException {
        assertTokenStreamContents(new WordDelimiterFilter(new CannedTokenStream(new Token[]{new Token("foo-bar", 5, 12)}), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 467, (CharArraySet) null), new String[]{"foo", "foobar", "bar"}, new int[]{5, 5, 9}, new int[]{8, 12, 12});
        assertTokenStreamContents(new WordDelimiterFilter(new CannedTokenStream(new Token[]{new Token("foo-bar", 5, 6)}), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 467, (CharArraySet) null), new String[]{"foo", "bar", "foobar"}, new int[]{5, 5, 5}, new int[]{6, 6, 6});
    }

    @Test
    public void testOffsetChange() throws Exception {
        assertTokenStreamContents(new WordDelimiterFilter(new CannedTokenStream(new Token[]{new Token("übelkeit)", 7, 16)}), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 467, (CharArraySet) null), new String[]{"übelkeit"}, new int[]{7}, new int[]{15});
    }

    @Test
    public void testOffsetChange2() throws Exception {
        assertTokenStreamContents(new WordDelimiterFilter(new CannedTokenStream(new Token[]{new Token("(übelkeit", 7, 17)}), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 467, (CharArraySet) null), new String[]{"übelkeit"}, new int[]{8}, new int[]{17});
    }

    @Test
    public void testOffsetChange3() throws Exception {
        assertTokenStreamContents(new WordDelimiterFilter(new CannedTokenStream(new Token[]{new Token("(übelkeit", 7, 16)}), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 467, (CharArraySet) null), new String[]{"übelkeit"}, new int[]{8}, new int[]{16});
    }

    @Test
    public void testOffsetChange4() throws Exception {
        assertTokenStreamContents(new WordDelimiterFilter(new CannedTokenStream(new Token[]{new Token("(foo,bar)", 7, 16)}), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 467, (CharArraySet) null), new String[]{"foo", "foobar", "bar"}, new int[]{8, 8, 12}, new int[]{11, 15, 15});
    }

    public void doSplit(String str, String... strArr) throws Exception {
        assertTokenStreamContents(new WordDelimiterFilter(keywordMockTokenizer(str), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 451, (CharArraySet) null), strArr);
    }

    @Test
    public void testSplits() throws Exception {
        doSplit("basic-split", "basic", "split");
        doSplit("camelCase", "camel", "Case");
        doSplit("บ้าน", "บ้าน");
        doSplit("test's'", "test");
        doSplit("Роберт", "Роберт");
        doSplit("РобЕрт", "Роб", "Ерт");
        doSplit("aǅungla", "aǅungla");
        doSplit("ســـــــــــــــــلام", "ســـــــــــــــــلام");
        doSplit("test⃝", "test⃝");
        doSplit("हिन्दी", "हिन्दी");
        doSplit("١٢٣٤", "١٢٣٤");
        doSplit("����", "����");
    }

    public void doSplitPossessive(int i, String str, String... strArr) throws Exception {
        assertTokenStreamContents(new WordDelimiterFilter(keywordMockTokenizer(str), 195 | (i == 1 ? 256 : 0), (CharArraySet) null), strArr);
    }

    @Test
    public void testPossessives() throws Exception {
        doSplitPossessive(1, "ra's", "ra");
        doSplitPossessive(0, "ra's", "ra", "s");
    }

    @Test
    public void testPositionIncrements() throws Exception {
        final CharArraySet charArraySet = new CharArraySet(new HashSet(Arrays.asList("NUTCH")), false);
        Closeable closeable = new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.1
            public Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new WordDelimiterFilter(mockTokenizer, 467, charArraySet));
            }
        };
        assertAnalyzesTo(closeable, "LUCENE / SOLR", new String[]{"LUCENE", "SOLR"}, new int[]{0, 9}, new int[]{6, 13}, null, new int[]{1, 1}, null, false);
        assertAnalyzesTo(closeable, "LUCENE / solR", new String[]{"LUCENE", "sol", "solR", "R"}, new int[]{0, 9, 9, 12}, new int[]{6, 12, 13, 13}, null, new int[]{1, 1, 0, 1}, null, false);
        assertAnalyzesTo(closeable, "LUCENE / NUTCH SOLR", new String[]{"LUCENE", "NUTCH", "SOLR"}, new int[]{0, 9, 15}, new int[]{6, 14, 19}, null, new int[]{1, 1, 1}, null, false);
        Closeable closeable2 = new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.2
            public Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new WordDelimiterFilter(new LargePosIncTokenFilter(mockTokenizer), 467, charArraySet));
            }
        };
        assertAnalyzesTo(closeable2, "LUCENE largegap SOLR", new String[]{"LUCENE", "largegap", "SOLR"}, new int[]{0, 7, 16}, new int[]{6, 15, 20}, null, new int[]{1, 10, 1}, null, false);
        assertAnalyzesTo(closeable2, "LUCENE / SOLR", new String[]{"LUCENE", "SOLR"}, new int[]{0, 9}, new int[]{6, 13}, null, new int[]{1, 11}, null, false);
        assertAnalyzesTo(closeable2, "LUCENE / solR", new String[]{"LUCENE", "sol", "solR", "R"}, new int[]{0, 9, 9, 12}, new int[]{6, 12, 13, 13}, null, new int[]{1, 11, 0, 1}, null, false);
        assertAnalyzesTo(closeable2, "LUCENE / NUTCH SOLR", new String[]{"LUCENE", "NUTCH", "SOLR"}, new int[]{0, 9, 15}, new int[]{6, 14, 19}, null, new int[]{1, 11, 1}, null, false);
        Closeable closeable3 = new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.3
            public Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new WordDelimiterFilter(new StopFilter(mockTokenizer, StandardAnalyzer.STOP_WORDS_SET), 467, charArraySet));
            }
        };
        assertAnalyzesTo(closeable3, "lucene.solr", new String[]{"lucene", "lucenesolr", "solr"}, new int[]{0, 0, 7}, new int[]{6, 11, 11}, null, new int[]{1, 0, 1}, null, false);
        assertAnalyzesTo(closeable3, "the lucene.solr", new String[]{"lucene", "lucenesolr", "solr"}, new int[]{4, 4, 11}, new int[]{10, 15, 15}, null, new int[]{2, 0, 1}, null, false);
        IOUtils.close(new Closeable[]{closeable, closeable2, closeable3});
    }

    public void testLotsOfConcatenating() throws Exception {
        Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.4
            public Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new WordDelimiterFilter(mockTokenizer, 479, (CharArraySet) null));
            }
        };
        assertAnalyzesTo(analyzer, "abc-def-123-456", new String[]{"abc", "abcdef", "abcdef123456", "def", "123", "123456", "456"}, new int[]{0, 0, 0, 4, 8, 8, 12}, new int[]{3, 7, 15, 7, 11, 15, 15}, null, new int[]{1, 0, 0, 1, 1, 0, 1}, null, false);
        analyzer.close();
    }

    public void testLotsOfConcatenating2() throws Exception {
        Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.5
            public Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new WordDelimiterFilter(mockTokenizer, 511, (CharArraySet) null));
            }
        };
        assertAnalyzesTo(analyzer, "abc-def-123-456", new String[]{"abc-def-123-456", "abc", "abcdef", "abcdef123456", "def", "123", "123456", "456"}, new int[]{0, 0, 0, 0, 4, 8, 8, 12}, new int[]{15, 3, 7, 15, 7, 11, 15, 15}, null, new int[]{1, 0, 0, 0, 1, 1, 0, 1}, null, false);
        analyzer.close();
    }

    public void testRandomStrings() throws Exception {
        int atLeast = atLeast(5);
        for (int i = 0; i < atLeast; i++) {
            final int nextInt = random().nextInt(512);
            final CharArraySet charArraySet = random().nextBoolean() ? new CharArraySet(new HashSet(Arrays.asList("a", "b", "cd")), false) : null;
            Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.6
                protected Analyzer.TokenStreamComponents createComponents(String str) {
                    MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                    return new Analyzer.TokenStreamComponents(mockTokenizer, new WordDelimiterFilter(mockTokenizer, nextInt, charArraySet));
                }
            };
            checkRandomData(random(), analyzer, 200 * RANDOM_MULTIPLIER, 20, false, false);
            analyzer.close();
        }
    }

    public void testRandomHugeStrings() throws Exception {
        int atLeast = atLeast(5);
        for (int i = 0; i < atLeast; i++) {
            final int nextInt = random().nextInt(512);
            final CharArraySet charArraySet = random().nextBoolean() ? new CharArraySet(new HashSet(Arrays.asList("a", "b", "cd")), false) : null;
            Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.7
                protected Analyzer.TokenStreamComponents createComponents(String str) {
                    MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                    return new Analyzer.TokenStreamComponents(mockTokenizer, new WordDelimiterFilter(mockTokenizer, nextInt, charArraySet));
                }
            };
            checkRandomData(random(), analyzer, 20 * RANDOM_MULTIPLIER, 8192, false, false);
            analyzer.close();
        }
    }

    public void testEmptyTerm() throws IOException {
        Random random = random();
        for (int i = 0; i < 512; i++) {
            final int i2 = i;
            final CharArraySet charArraySet = random.nextBoolean() ? new CharArraySet(new HashSet(Arrays.asList("a", "b", "cd")), false) : null;
            Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.8
                protected Analyzer.TokenStreamComponents createComponents(String str) {
                    KeywordTokenizer keywordTokenizer = new KeywordTokenizer();
                    return new Analyzer.TokenStreamComponents(keywordTokenizer, new WordDelimiterFilter(keywordTokenizer, i2, charArraySet));
                }
            };
            checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
            analyzer.close();
        }
    }

    public void testOnlyNumbers() throws Exception {
        final int i = 193;
        assertAnalyzesTo(new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.9
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new WordDelimiterFilter(mockTokenizer, i, (CharArraySet) null));
            }
        }, "7-586", new String[0], new int[0], new int[0], null, new int[0], null, false);
    }

    public void testNumberPunct() throws Exception {
        final int i = 193;
        assertAnalyzesTo(new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.10
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new WordDelimiterFilter(mockTokenizer, i, (CharArraySet) null));
            }
        }, "6-", new String[]{"6"}, new int[]{0}, new int[]{1}, null, new int[]{1}, null, false);
    }

    private Analyzer getAnalyzer(final int i) {
        return new Analyzer() { // from class: org.apache.lucene.analysis.miscellaneous.TestWordDelimiterFilter.11
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new WordDelimiterFilter(mockTokenizer, i, (CharArraySet) null));
            }
        };
    }
}
