package org.apache.lucene.analysis.morfologik;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import morfologik.stemming.Dictionary;
import morfologik.stemming.DictionaryLookup;
import morfologik.stemming.IStemmer;
import morfologik.stemming.WordData;
import morfologik.stemming.polish.PolishStemmer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CharsRefBuilder;

/* loaded from: input_file:libs/lucene-analyzers-morfologik-6.6.5-patched.19.jar:org/apache/lucene/analysis/morfologik/MorfologikFilter.class */
public class MorfologikFilter extends TokenFilter {
    private final CharTermAttribute termAtt;
    private final MorphosyntacticTagsAttribute tagsAtt;
    private final PositionIncrementAttribute posIncrAtt;
    private final KeywordAttribute keywordAttr;
    private final CharsRefBuilder scratch;
    private AttributeSource.State current;
    private final TokenStream input;
    private final IStemmer stemmer;
    private List<WordData> lemmaList;
    private final ArrayList<StringBuilder> tagsList;
    private int lemmaListIndex;
    private static final Pattern lemmaSplitter = Pattern.compile("\\+|\\|");

    public MorfologikFilter(TokenStream tokenStream) {
        this(tokenStream, new PolishStemmer().getDictionary());
    }

    public MorfologikFilter(TokenStream tokenStream, Dictionary dictionary) {
        super(tokenStream);
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.tagsAtt = (MorphosyntacticTagsAttribute) addAttribute(MorphosyntacticTagsAttribute.class);
        this.posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
        this.keywordAttr = (KeywordAttribute) addAttribute(KeywordAttribute.class);
        this.scratch = new CharsRefBuilder();
        this.tagsList = new ArrayList<>();
        this.input = tokenStream;
        this.stemmer = new DictionaryLookup(dictionary);
        this.lemmaList = Collections.emptyList();
    }

    private void popNextLemma() {
        List<WordData> list = this.lemmaList;
        int i = this.lemmaListIndex;
        this.lemmaListIndex = i + 1;
        WordData wordData = list.get(i);
        this.termAtt.setEmpty().append(wordData.getStem());
        CharSequence tag = wordData.getTag();
        if (tag == null) {
            this.tagsAtt.setTags(Collections.emptyList());
            return;
        }
        String[] split = lemmaSplitter.split(tag.toString());
        for (int i2 = 0; i2 < split.length; i2++) {
            if (this.tagsList.size() <= i2) {
                this.tagsList.add(new StringBuilder());
            }
            StringBuilder sb = this.tagsList.get(i2);
            sb.setLength(0);
            sb.append(split[i2]);
        }
        this.tagsAtt.setTags(this.tagsList.subList(0, split.length));
    }

    private boolean lookupSurfaceForm(CharSequence charSequence) {
        this.lemmaList = this.stemmer.lookup(charSequence);
        this.lemmaListIndex = 0;
        return this.lemmaList.size() > 0;
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public final boolean incrementToken() throws IOException {
        if (this.lemmaListIndex < this.lemmaList.size()) {
            restoreState(this.current);
            this.posIncrAtt.setPositionIncrement(0);
            popNextLemma();
            return true;
        }
        if (!this.input.incrementToken()) {
            return false;
        }
        if (this.keywordAttr.isKeyword() || !(lookupSurfaceForm(this.termAtt) || lookupSurfaceForm(toLowercase(this.termAtt)))) {
            this.tagsAtt.clear();
            return true;
        }
        this.current = captureState();
        popNextLemma();
        return true;
    }

    private CharSequence toLowercase(CharSequence charSequence) {
        int length = charSequence.length();
        this.scratch.setLength(length);
        this.scratch.grow(length);
        char[] chars = this.scratch.chars();
        int i = 0;
        while (true) {
            int i2 = i;
            if (i2 >= length) {
                return this.scratch.get();
            }
            i = i2 + Character.toChars(Character.toLowerCase(Character.codePointAt(charSequence, i2)), chars, i2);
        }
    }

    @Override // org.apache.lucene.analysis.TokenFilter, org.apache.lucene.analysis.TokenStream
    public void reset() throws IOException {
        this.lemmaListIndex = 0;
        this.lemmaList = Collections.emptyList();
        this.tagsList.clear();
        super.reset();
    }
}
