package org.apache.lucene.analysis.custom;

import java.io.IOException;
import java.io.Reader;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory;
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
import org.apache.lucene.analysis.standard.ClassicTokenizerFactory;
import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SetOnce;
import org.apache.lucene.util.Version;

/* loaded from: input_file:org/apache/lucene/analysis/custom/TestCustomAnalyzer.class */
public class TestCustomAnalyzer extends BaseTokenStreamTestCase {

    /* loaded from: input_file:org/apache/lucene/analysis/custom/TestCustomAnalyzer$DummyCharFilter.class */
    private static class DummyCharFilter extends CharFilter {
        private final char match;
        private final char repl;

        public DummyCharFilter(Reader reader, char c, char c2) {
            super(reader);
            this.match = c;
            this.repl = c2;
        }

        protected int correct(int i) {
            return i;
        }

        public int read(char[] cArr, int i, int i2) throws IOException {
            int read = this.input.read(cArr, i, i2);
            for (int i3 = 0; i3 < read; i3++) {
                if (cArr[i + i3] == this.match) {
                    cArr[i + i3] = this.repl;
                }
            }
            return read;
        }
    }

    /* loaded from: input_file:org/apache/lucene/analysis/custom/TestCustomAnalyzer$DummyCharFilterFactory.class */
    public static class DummyCharFilterFactory extends CharFilterFactory {
        private final char match;
        private final char repl;

        public DummyCharFilterFactory(Map<String, String> map) {
            this(map, '0', '1');
        }

        DummyCharFilterFactory(Map<String, String> map, char c, char c2) {
            super(map);
            this.match = c;
            this.repl = c2;
        }

        public Reader create(Reader reader) {
            return new DummyCharFilter(reader, this.match, this.repl);
        }
    }

    /* loaded from: input_file:org/apache/lucene/analysis/custom/TestCustomAnalyzer$DummyMultiTermAwareCharFilterFactory.class */
    public static class DummyMultiTermAwareCharFilterFactory extends DummyCharFilterFactory implements MultiTermAwareComponent {
        public DummyMultiTermAwareCharFilterFactory(Map<String, String> map) {
            super(map);
        }

        public AbstractAnalysisFactory getMultiTermComponent() {
            return new DummyCharFilterFactory(Collections.emptyMap(), '0', '2');
        }
    }

    /* loaded from: input_file:org/apache/lucene/analysis/custom/TestCustomAnalyzer$DummyMultiTermAwareTokenFilterFactory.class */
    public static class DummyMultiTermAwareTokenFilterFactory extends DummyTokenFilterFactory implements MultiTermAwareComponent {
        public DummyMultiTermAwareTokenFilterFactory(Map<String, String> map) {
            super(map);
        }

        public AbstractAnalysisFactory getMultiTermComponent() {
            return new ASCIIFoldingFilterFactory(Collections.emptyMap());
        }
    }

    /* loaded from: input_file:org/apache/lucene/analysis/custom/TestCustomAnalyzer$DummyMultiTermAwareTokenizerFactory.class */
    public static class DummyMultiTermAwareTokenizerFactory extends DummyTokenizerFactory implements MultiTermAwareComponent {
        public DummyMultiTermAwareTokenizerFactory(Map<String, String> map) {
            super(map);
        }

        public AbstractAnalysisFactory getMultiTermComponent() {
            return new KeywordTokenizerFactory(getOriginalArgs());
        }
    }

    /* loaded from: input_file:org/apache/lucene/analysis/custom/TestCustomAnalyzer$DummyTokenFilterFactory.class */
    public static class DummyTokenFilterFactory extends TokenFilterFactory {
        public DummyTokenFilterFactory(Map<String, String> map) {
            super(map);
        }

        public TokenStream create(TokenStream tokenStream) {
            return tokenStream;
        }
    }

    /* loaded from: input_file:org/apache/lucene/analysis/custom/TestCustomAnalyzer$DummyTokenizerFactory.class */
    public static class DummyTokenizerFactory extends TokenizerFactory {
        public DummyTokenizerFactory(Map<String, String> map) {
            super(map);
        }

        public Tokenizer create(AttributeFactory attributeFactory) {
            return new LowerCaseTokenizer(attributeFactory);
        }
    }

    public void testWhitespaceFactoryWithFolding() throws Exception {
        CustomAnalyzer build = CustomAnalyzer.builder().withTokenizer(WhitespaceTokenizerFactory.class, new String[0]).addTokenFilter(ASCIIFoldingFilterFactory.class, new String[]{"preserveOriginal", "true"}).addTokenFilter(LowerCaseFilterFactory.class, new String[0]).build();
        assertSame(WhitespaceTokenizerFactory.class, build.getTokenizerFactory().getClass());
        assertEquals(Collections.emptyList(), build.getCharFilterFactories());
        List tokenFilterFactories = build.getTokenFilterFactories();
        assertEquals(2L, tokenFilterFactories.size());
        assertSame(ASCIIFoldingFilterFactory.class, ((TokenFilterFactory) tokenFilterFactories.get(0)).getClass());
        assertSame(LowerCaseFilterFactory.class, ((TokenFilterFactory) tokenFilterFactories.get(1)).getClass());
        assertEquals(0L, build.getPositionIncrementGap("dummy"));
        assertEquals(1L, build.getOffsetGap("dummy"));
        assertSame(Version.LATEST, build.getVersion());
        assertAnalyzesTo(build, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"}, new int[]{1, 1, 1, 1});
        assertAnalyzesTo(build, "föó bär FÖÖ BAR", new String[]{"foo", "föó", "bar", "bär", "foo", "föö", "bar"}, new int[]{1, 0, 1, 0, 1, 0, 1});
        build.close();
    }

    public void testWhitespaceWithFolding() throws Exception {
        CustomAnalyzer build = CustomAnalyzer.builder().withTokenizer("whitespace", new String[0]).addTokenFilter("asciifolding", new String[]{"preserveOriginal", "true"}).addTokenFilter("lowercase", new String[0]).build();
        assertSame(WhitespaceTokenizerFactory.class, build.getTokenizerFactory().getClass());
        assertEquals(Collections.emptyList(), build.getCharFilterFactories());
        List tokenFilterFactories = build.getTokenFilterFactories();
        assertEquals(2L, tokenFilterFactories.size());
        assertSame(ASCIIFoldingFilterFactory.class, ((TokenFilterFactory) tokenFilterFactories.get(0)).getClass());
        assertSame(LowerCaseFilterFactory.class, ((TokenFilterFactory) tokenFilterFactories.get(1)).getClass());
        assertEquals(0L, build.getPositionIncrementGap("dummy"));
        assertEquals(1L, build.getOffsetGap("dummy"));
        assertSame(Version.LATEST, build.getVersion());
        assertAnalyzesTo(build, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"}, new int[]{1, 1, 1, 1});
        assertAnalyzesTo(build, "föó bär FÖÖ BAR", new String[]{"foo", "föó", "bar", "bär", "foo", "föö", "bar"}, new int[]{1, 0, 1, 0, 1, 0, 1});
        build.close();
    }

    public void testFactoryHtmlStripClassicFolding() throws Exception {
        CustomAnalyzer build = CustomAnalyzer.builder().withDefaultMatchVersion(Version.LUCENE_5_0_0).addCharFilter(HTMLStripCharFilterFactory.class, new String[0]).withTokenizer(ClassicTokenizerFactory.class, new String[0]).addTokenFilter(ASCIIFoldingFilterFactory.class, new String[]{"preserveOriginal", "true"}).addTokenFilter(LowerCaseFilterFactory.class, new String[0]).withPositionIncrementGap(100).withOffsetGap(1000).build();
        assertSame(ClassicTokenizerFactory.class, build.getTokenizerFactory().getClass());
        List charFilterFactories = build.getCharFilterFactories();
        assertEquals(1L, charFilterFactories.size());
        assertEquals(HTMLStripCharFilterFactory.class, ((CharFilterFactory) charFilterFactories.get(0)).getClass());
        List tokenFilterFactories = build.getTokenFilterFactories();
        assertEquals(2L, tokenFilterFactories.size());
        assertSame(ASCIIFoldingFilterFactory.class, ((TokenFilterFactory) tokenFilterFactories.get(0)).getClass());
        assertSame(LowerCaseFilterFactory.class, ((TokenFilterFactory) tokenFilterFactories.get(1)).getClass());
        assertEquals(100L, build.getPositionIncrementGap("dummy"));
        assertEquals(1000L, build.getOffsetGap("dummy"));
        assertSame(Version.LUCENE_5_0_0, build.getVersion());
        assertAnalyzesTo(build, "<p>foo bar</p> FOO BAR", new String[]{"foo", "bar", "foo", "bar"}, new int[]{1, 1, 1, 1});
        assertAnalyzesTo(build, "<p><b>föó</b> bär     FÖÖ BAR</p>", new String[]{"foo", "föó", "bar", "bär", "foo", "föö", "bar"}, new int[]{1, 0, 1, 0, 1, 0, 1});
        build.close();
    }

    public void testHtmlStripClassicFolding() throws Exception {
        CustomAnalyzer build = CustomAnalyzer.builder().withDefaultMatchVersion(Version.LUCENE_5_0_0).addCharFilter("htmlstrip", new String[0]).withTokenizer("classic", new String[0]).addTokenFilter("asciifolding", new String[]{"preserveOriginal", "true"}).addTokenFilter("lowercase", new String[0]).withPositionIncrementGap(100).withOffsetGap(1000).build();
        assertSame(ClassicTokenizerFactory.class, build.getTokenizerFactory().getClass());
        List charFilterFactories = build.getCharFilterFactories();
        assertEquals(1L, charFilterFactories.size());
        assertEquals(HTMLStripCharFilterFactory.class, ((CharFilterFactory) charFilterFactories.get(0)).getClass());
        List tokenFilterFactories = build.getTokenFilterFactories();
        assertEquals(2L, tokenFilterFactories.size());
        assertSame(ASCIIFoldingFilterFactory.class, ((TokenFilterFactory) tokenFilterFactories.get(0)).getClass());
        assertSame(LowerCaseFilterFactory.class, ((TokenFilterFactory) tokenFilterFactories.get(1)).getClass());
        assertEquals(100L, build.getPositionIncrementGap("dummy"));
        assertEquals(1000L, build.getOffsetGap("dummy"));
        assertSame(Version.LUCENE_5_0_0, build.getVersion());
        assertAnalyzesTo(build, "<p>foo bar</p> FOO BAR", new String[]{"foo", "bar", "foo", "bar"}, new int[]{1, 1, 1, 1});
        assertAnalyzesTo(build, "<p><b>föó</b> bär     FÖÖ BAR</p>", new String[]{"foo", "föó", "bar", "bär", "foo", "föö", "bar"}, new int[]{1, 0, 1, 0, 1, 0, 1});
        build.close();
    }

    public void testStopWordsFromClasspath() throws Exception {
        CustomAnalyzer build = CustomAnalyzer.builder().withTokenizer(WhitespaceTokenizerFactory.class, new String[0]).addTokenFilter("stop", new String[]{"ignoreCase", "true", "words", "org/apache/lucene/analysis/custom/teststop.txt", "format", "wordset"}).build();
        assertSame(WhitespaceTokenizerFactory.class, build.getTokenizerFactory().getClass());
        assertEquals(Collections.emptyList(), build.getCharFilterFactories());
        List tokenFilterFactories = build.getTokenFilterFactories();
        assertEquals(1L, tokenFilterFactories.size());
        assertSame(StopFilterFactory.class, ((TokenFilterFactory) tokenFilterFactories.get(0)).getClass());
        assertEquals(0L, build.getPositionIncrementGap("dummy"));
        assertEquals(1L, build.getOffsetGap("dummy"));
        assertSame(Version.LATEST, build.getVersion());
        assertAnalyzesTo(build, "foo Foo Bar", new String[0]);
        build.close();
    }

    public void testStopWordsFromClasspathWithMap() throws Exception {
        HashMap hashMap = new HashMap();
        hashMap.put("ignoreCase", "true");
        hashMap.put("words", "org/apache/lucene/analysis/custom/teststop.txt");
        hashMap.put("format", "wordset");
        HashMap hashMap2 = new HashMap(hashMap);
        Map unmodifiableMap = Collections.unmodifiableMap(new HashMap(hashMap));
        CustomAnalyzer build = CustomAnalyzer.builder().withTokenizer("whitespace", new String[0]).addTokenFilter("stop", hashMap).build();
        assertTrue(hashMap.isEmpty());
        assertAnalyzesTo(build, "foo Foo Bar", new String[0]);
        CustomAnalyzer build2 = CustomAnalyzer.builder().withTokenizer(WhitespaceTokenizerFactory.class, new String[0]).addTokenFilter(StopFilterFactory.class, hashMap2).build();
        assertTrue(hashMap2.isEmpty());
        assertAnalyzesTo(build2, "foo Foo Bar", new String[0]);
        expectThrows(UnsupportedOperationException.class, () -> {
            CustomAnalyzer.builder().withTokenizer("whitespace", new String[0]).addTokenFilter("stop", unmodifiableMap).build();
        });
        build2.close();
    }

    public void testStopWordsFromFile() throws Exception {
        CustomAnalyzer build = CustomAnalyzer.builder(getDataPath("")).withTokenizer("whitespace", new String[0]).addTokenFilter("stop", new String[]{"ignoreCase", "true", "words", "teststop.txt", "format", "wordset"}).build();
        assertAnalyzesTo(build, "foo Foo Bar", new String[0]);
        build.close();
    }

    public void testStopWordsFromFileAbsolute() throws Exception {
        CustomAnalyzer build = CustomAnalyzer.builder(Paths.get(".", new String[0])).withTokenizer("whitespace", new String[0]).addTokenFilter("stop", new String[]{"ignoreCase", "true", "words", getDataPath("teststop.txt").toString(), "format", "wordset"}).build();
        assertAnalyzesTo(build, "foo Foo Bar", new String[0]);
        build.close();
    }

    public void testIncorrectOrder() throws Exception {
        expectThrows(IllegalStateException.class, () -> {
            CustomAnalyzer.builder().addCharFilter("htmlstrip", new String[0]).withDefaultMatchVersion(Version.LATEST).withTokenizer("whitespace", new String[0]).build();
        });
    }

    public void testMissingSPI() throws Exception {
        IllegalArgumentException illegalArgumentException = (IllegalArgumentException) expectThrows(IllegalArgumentException.class, () -> {
            CustomAnalyzer.builder().withTokenizer("foobar_nonexistent", new String[0]).build();
        });
        assertTrue(illegalArgumentException.getMessage().contains("SPI"));
        assertTrue(illegalArgumentException.getMessage().contains("does not exist"));
    }

    public void testSetTokenizerTwice() throws Exception {
        expectThrows(SetOnce.AlreadySetException.class, () -> {
            CustomAnalyzer.builder().withTokenizer("whitespace", new String[0]).withTokenizer(StandardTokenizerFactory.class, new String[0]).build();
        });
    }

    public void testSetMatchVersionTwice() throws Exception {
        expectThrows(SetOnce.AlreadySetException.class, () -> {
            CustomAnalyzer.builder().withDefaultMatchVersion(Version.LATEST).withDefaultMatchVersion(Version.LATEST).withTokenizer("standard", new String[0]).build();
        });
    }

    public void testSetPosIncTwice() throws Exception {
        expectThrows(SetOnce.AlreadySetException.class, () -> {
            CustomAnalyzer.builder().withPositionIncrementGap(2).withPositionIncrementGap(3).withTokenizer("standard", new String[0]).build();
        });
    }

    public void testSetOfsGapTwice() throws Exception {
        expectThrows(SetOnce.AlreadySetException.class, () -> {
            CustomAnalyzer.builder().withOffsetGap(2).withOffsetGap(3).withTokenizer("standard", new String[0]).build();
        });
    }

    public void testNoTokenizer() throws Exception {
        expectThrows(IllegalStateException.class, () -> {
            CustomAnalyzer.builder().build();
        });
    }

    public void testNullTokenizer() throws Exception {
        expectThrows(NullPointerException.class, () -> {
            CustomAnalyzer.builder().withTokenizer((String) null, new String[0]).build();
        });
    }

    public void testNullTokenizerFactory() throws Exception {
        expectThrows(NullPointerException.class, () -> {
            CustomAnalyzer.builder().withTokenizer((Class) null, new String[0]).build();
        });
    }

    public void testNullParamKey() throws Exception {
        expectThrows(NullPointerException.class, () -> {
            CustomAnalyzer.builder().withTokenizer("whitespace", new String[]{null, "foo"}).build();
        });
    }

    public void testNullMatchVersion() throws Exception {
        expectThrows(NullPointerException.class, () -> {
            CustomAnalyzer.builder().withDefaultMatchVersion((Version) null).withTokenizer("whitespace", new String[0]).build();
        });
    }

    public void testNormalization() throws IOException {
        assertEquals(new BytesRef("0À"), CustomAnalyzer.builder().withTokenizer(DummyTokenizerFactory.class, Collections.emptyMap()).addCharFilter(DummyCharFilterFactory.class, Collections.emptyMap()).addTokenFilter(DummyTokenFilterFactory.class, Collections.emptyMap()).build().normalize("dummy", "0À"));
        assertEquals(new BytesRef("2A"), CustomAnalyzer.builder().withTokenizer(DummyMultiTermAwareTokenizerFactory.class, Collections.emptyMap()).addCharFilter(DummyMultiTermAwareCharFilterFactory.class, Collections.emptyMap()).addTokenFilter(DummyMultiTermAwareTokenFilterFactory.class, Collections.emptyMap()).build().normalize("dummy", "0À"));
    }

    public void testNormalizationWithMultipleTokenFilters() throws IOException {
        assertEquals(new BytesRef("a b e"), CustomAnalyzer.builder().withTokenizer(WhitespaceTokenizerFactory.class, Collections.emptyMap()).addTokenFilter(LowerCaseFilterFactory.class, Collections.emptyMap()).addTokenFilter(ASCIIFoldingFilterFactory.class, Collections.emptyMap()).build().normalize("dummy", "À B é"));
    }

    public void testNormalizationWithMultiplCharFilters() throws IOException {
        assertEquals(new BytesRef("e f c"), CustomAnalyzer.builder().withTokenizer(WhitespaceTokenizerFactory.class, Collections.emptyMap()).addCharFilter(MappingCharFilterFactory.class, new HashMap(Collections.singletonMap("mapping", "org/apache/lucene/analysis/custom/mapping1.txt"))).addCharFilter(MappingCharFilterFactory.class, new HashMap(Collections.singletonMap("mapping", "org/apache/lucene/analysis/custom/mapping2.txt"))).build().normalize("dummy", "a b c"));
    }
}
