package org.apache.lucene.analysis.compound;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.xml.sax.InputSource;

/* loaded from: input_file:org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.class */
public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {

    /* loaded from: input_file:org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter$MockRetainAttribute.class */
    public interface MockRetainAttribute extends Attribute {
        void setRetain(boolean z);

        boolean getRetain();
    }

    /* loaded from: input_file:org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter$MockRetainAttributeFilter.class */
    private static class MockRetainAttributeFilter extends TokenFilter {
        MockRetainAttribute retainAtt;

        MockRetainAttributeFilter(TokenStream tokenStream) {
            super(tokenStream);
            this.retainAtt = (MockRetainAttribute) addAttribute(MockRetainAttribute.class);
        }

        public boolean incrementToken() throws IOException {
            if (!this.input.incrementToken()) {
                return false;
            }
            this.retainAtt.setRetain(true);
            return true;
        }
    }

    /* loaded from: input_file:org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter$MockRetainAttributeImpl.class */
    public static final class MockRetainAttributeImpl extends AttributeImpl implements MockRetainAttribute {
        private boolean retain = false;

        public void clear() {
            this.retain = false;
        }

        @Override // org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter.MockRetainAttribute
        public boolean getRetain() {
            return this.retain;
        }

        @Override // org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter.MockRetainAttribute
        public void setRetain(boolean z) {
            this.retain = z;
        }

        public void copyTo(AttributeImpl attributeImpl) {
            ((MockRetainAttribute) attributeImpl).setRetain(this.retain);
        }

        public void reflectWith(AttributeReflector attributeReflector) {
            attributeReflector.reflect(MockRetainAttribute.class, "retain", Boolean.valueOf(this.retain));
        }
    }

    private static CharArraySet makeDictionary(String... strArr) {
        return new CharArraySet(Arrays.asList(strArr), true);
    }

    public void testHyphenationCompoundWordsDA() throws Exception {
        assertTokenStreamContents(new HyphenationCompoundWordTokenFilter(whitespaceMockTokenizer("min veninde som er lidt af en læsehest"), HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm())), makeDictionary("læse", "hest"), 5, 2, 15, false), new String[]{"min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest"}, new int[]{1, 1, 1, 1, 1, 1, 1, 1, 0, 0});
    }

    public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
        assertTokenStreamContents(new HyphenationCompoundWordTokenFilter(whitespaceMockTokenizer("basketballkurv"), HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm())), makeDictionary("basketball", "basket", "ball", "kurv"), 5, 2, 40, true), new String[]{"basketballkurv", "basketball", "ball", "kurv"}, new int[]{1, 0, 0, 0});
    }

    public void testHyphenationOnly() throws Exception {
        HyphenationTree hyphenationTree = HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm()));
        assertTokenStreamContents(new HyphenationCompoundWordTokenFilter(whitespaceMockTokenizer("basketballkurv"), hyphenationTree, 5, 2, 4), new String[]{"basketballkurv", "ba", "sket", "bal", "ball", "kurv"});
        assertTokenStreamContents(new HyphenationCompoundWordTokenFilter(whitespaceMockTokenizer("basketballkurv"), hyphenationTree, 5, 4, 6), new String[]{"basketballkurv", "basket", "sket", "ball", "lkurv", "kurv"});
        assertTokenStreamContents(new HyphenationCompoundWordTokenFilter(whitespaceMockTokenizer("basketballkurv"), hyphenationTree, 5, 4, 10), new String[]{"basketballkurv", "basket", "basketbal", "basketball", "sket", "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv"});
    }

    public void testDumbCompoundWordsSE() throws Exception {
        assertTokenStreamContents(new DictionaryCompoundWordTokenFilter(whitespaceMockTokenizer("Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"), makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad")), new String[]{"Bildörr", "Bil", "dörr", "Bilmotor", "Bil", "motor", "Biltak", "Bil", "tak", "Slagborr", "Slag", "borr", "Hammarborr", "Hammar", "borr", "Pelarborr", "Pelar", "borr", "Glasögonfodral", "Glas", "ögon", "fodral", "Basfiolsfodral", "Bas", "fiol", "fodral", "Basfiolsfodralmakaregesäll", "Bas", "fiol", "fodral", "makare", "gesäll", "Skomakare", "Sko", "makare", "Vindrutetorkare", "Vind", "rute", "torkare", "Vindrutetorkarblad", "Vind", "rute", "blad", "abba"}, new int[]{0, 0, 0, 8, 8, 8, 17, 17, 17, 24, 24, 24, 33, 33, 33, 44, 44, 44, 54, 54, 54, 54, 69, 69, 69, 69, 84, 84, 84, 84, 84, 84, 111, 111, 111, 121, 121, 121, 121, 137, 137, 137, 137, 156}, new int[]{7, 7, 7, 16, 16, 16, 23, 23, 23, 32, 32, 32, 43, 43, 43, 53, 53, 53, 68, 68, 68, 68, 83, 83, 83, 83, 110, 110, 110, 110, 110, 110, 120, 120, 120, 136, 136, 136, 136, 155, 155, 155, 155, 160}, new int[]{1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1});
    }

    public void testDumbCompoundWordsSELongestMatch() throws Exception {
        assertTokenStreamContents(new DictionaryCompoundWordTokenFilter(whitespaceMockTokenizer("Basfiolsfodralmakaregesäll"), makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral"), 5, 2, 15, true), new String[]{"Basfiolsfodralmakaregesäll", "Bas", "fiolsfodral", "fodral", "makare", "gesäll"}, new int[]{0, 0, 0, 0, 0, 0}, new int[]{26, 26, 26, 26, 26, 26}, new int[]{1, 0, 0, 0, 0, 0});
    }

    public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception {
        CharArraySet makeDictionary = makeDictionary("ab", "cd", "ef");
        MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        mockTokenizer.setReader(new StringReader("abcdef"));
        assertTokenStreamContents(new DictionaryCompoundWordTokenFilter(mockTokenizer, makeDictionary, 5, 2, 15, false), new String[]{"abcdef", "ab", "cd", "ef"}, new int[]{0, 0, 0, 0}, new int[]{6, 6, 6, 6}, new int[]{1, 0, 0, 0});
    }

    public void testWordComponentWithLessThanMinimumLength() throws Exception {
        CharArraySet makeDictionary = makeDictionary("abc", "d", "efg");
        MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        mockTokenizer.setReader(new StringReader("abcdefg"));
        assertTokenStreamContents(new DictionaryCompoundWordTokenFilter(mockTokenizer, makeDictionary, 5, 2, 15, false), new String[]{"abcdefg", "abc", "efg"}, new int[]{0, 0, 0}, new int[]{7, 7, 7}, new int[]{1, 0, 0});
    }

    public void testReset() throws Exception {
        CharArraySet makeDictionary = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung");
        MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        mockTokenizer.setEnableChecks(false);
        mockTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
        DictionaryCompoundWordTokenFilter dictionaryCompoundWordTokenFilter = new DictionaryCompoundWordTokenFilter(mockTokenizer, makeDictionary, 5, 2, 15, false);
        CharTermAttribute attribute = dictionaryCompoundWordTokenFilter.getAttribute(CharTermAttribute.class);
        dictionaryCompoundWordTokenFilter.reset();
        assertTrue(dictionaryCompoundWordTokenFilter.incrementToken());
        assertEquals("Rindfleischüberwachungsgesetz", attribute.toString());
        assertTrue(dictionaryCompoundWordTokenFilter.incrementToken());
        assertEquals("Rind", attribute.toString());
        dictionaryCompoundWordTokenFilter.end();
        dictionaryCompoundWordTokenFilter.close();
        mockTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
        dictionaryCompoundWordTokenFilter.reset();
        assertTrue(dictionaryCompoundWordTokenFilter.incrementToken());
        assertEquals("Rindfleischüberwachungsgesetz", attribute.toString());
    }

    public void testRetainMockAttribute() throws Exception {
        CharArraySet makeDictionary = makeDictionary("abc", "d", "efg");
        MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        mockTokenizer.setReader(new StringReader("abcdefg"));
        DictionaryCompoundWordTokenFilter dictionaryCompoundWordTokenFilter = new DictionaryCompoundWordTokenFilter(new MockRetainAttributeFilter(mockTokenizer), makeDictionary, 5, 2, 15, false);
        MockRetainAttribute mockRetainAttribute = (MockRetainAttribute) dictionaryCompoundWordTokenFilter.addAttribute(MockRetainAttribute.class);
        dictionaryCompoundWordTokenFilter.reset();
        while (dictionaryCompoundWordTokenFilter.incrementToken()) {
            assertTrue("Custom attribute value was lost", mockRetainAttribute.getRetain());
        }
    }

    public void testInvalidOffsets() throws Exception {
        final CharArraySet makeDictionary = makeDictionary("fall");
        NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
        builder.add("ü", "ue");
        final NormalizeCharMap build = builder.build();
        Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter.1
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new DictionaryCompoundWordTokenFilter(mockTokenizer, makeDictionary));
            }

            protected Reader initReader(String str, Reader reader) {
                return new MappingCharFilter(build, reader);
            }
        };
        assertAnalyzesTo(analyzer, "banküberfall", new String[]{"bankueberfall", "fall"}, new int[]{0, 0}, new int[]{12, 12});
        analyzer.close();
    }

    public void testRandomStrings() throws Exception {
        final CharArraySet makeDictionary = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
        Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter.2
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new DictionaryCompoundWordTokenFilter(mockTokenizer, makeDictionary));
            }
        };
        checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
        analyzer.close();
        final HyphenationTree hyphenationTree = HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm()));
        Analyzer analyzer2 = new Analyzer() { // from class: org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter.3
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new HyphenationCompoundWordTokenFilter(mockTokenizer, hyphenationTree));
            }
        };
        checkRandomData(random(), analyzer2, 1000 * RANDOM_MULTIPLIER);
        analyzer2.close();
    }

    public void testEmptyTerm() throws Exception {
        final CharArraySet makeDictionary = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
        Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter.4
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                KeywordTokenizer keywordTokenizer = new KeywordTokenizer();
                return new Analyzer.TokenStreamComponents(keywordTokenizer, new DictionaryCompoundWordTokenFilter(keywordTokenizer, makeDictionary));
            }
        };
        checkOneTerm(analyzer, "", "");
        analyzer.close();
        final HyphenationTree hyphenationTree = HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm()));
        Analyzer analyzer2 = new Analyzer() { // from class: org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter.5
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                KeywordTokenizer keywordTokenizer = new KeywordTokenizer();
                return new Analyzer.TokenStreamComponents(keywordTokenizer, new HyphenationCompoundWordTokenFilter(keywordTokenizer, hyphenationTree));
            }
        };
        checkOneTerm(analyzer2, "", "");
        analyzer2.close();
    }
}
