package org.apache.lucene.analysis.icu.segmentation;

import com.ibm.icu.lang.UCharacter;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.icu.tokenattributes.ScriptAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;

/* loaded from: input_file:libs/lucene-analyzers-icu-6.6.5-patched.9.jar:org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.class */
public final class ICUTokenizer extends Tokenizer {
    private static final int IOBUFFER = 4096;
    private final char[] buffer;
    private int length;
    private int usableLength;
    private int offset;
    private final CompositeBreakIterator breaker;
    private final ICUTokenizerConfig config;
    private final OffsetAttribute offsetAtt;
    private final CharTermAttribute termAtt;
    private final TypeAttribute typeAtt;
    private final ScriptAttribute scriptAtt;
    static final /* synthetic */ boolean $assertionsDisabled;

    public ICUTokenizer() {
        this(new DefaultICUTokenizerConfig(true, true));
    }

    public ICUTokenizer(ICUTokenizerConfig iCUTokenizerConfig) {
        this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, iCUTokenizerConfig);
    }

    public ICUTokenizer(AttributeFactory attributeFactory, ICUTokenizerConfig iCUTokenizerConfig) {
        super(attributeFactory);
        this.buffer = new char[4096];
        this.length = 0;
        this.usableLength = 0;
        this.offset = 0;
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
        this.scriptAtt = (ScriptAttribute) addAttribute(ScriptAttribute.class);
        this.config = iCUTokenizerConfig;
        this.breaker = new CompositeBreakIterator(iCUTokenizerConfig);
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public boolean incrementToken() throws IOException {
        clearAttributes();
        if (this.length == 0) {
            refill();
        }
        while (!incrementTokenBuffer()) {
            refill();
            if (this.length <= 0) {
                return false;
            }
        }
        return true;
    }

    @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream
    public void reset() throws IOException {
        super.reset();
        this.breaker.setText(this.buffer, 0, 0);
        this.offset = 0;
        this.usableLength = 0;
        this.length = 0;
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public void end() throws IOException {
        super.end();
        int i = this.length < 0 ? this.offset : this.offset + this.length;
        this.offsetAtt.setOffset(correctOffset(i), correctOffset(i));
    }

    private int findSafeEnd() {
        for (int i = this.length - 1; i >= 0; i--) {
            if (UCharacter.isWhitespace(this.buffer[i])) {
                return i + 1;
            }
        }
        return -1;
    }

    private void refill() throws IOException {
        this.offset += this.usableLength;
        int i = this.length - this.usableLength;
        System.arraycopy(this.buffer, this.usableLength, this.buffer, 0, i);
        int length = this.buffer.length - i;
        int read = read(this.input, this.buffer, i, length);
        this.length = read + i;
        if (read < length) {
            this.usableLength = this.length;
        } else {
            this.usableLength = findSafeEnd();
            if (this.usableLength < 0) {
                this.usableLength = this.length;
            }
        }
        this.breaker.setText(this.buffer, 0, Math.max(0, this.usableLength));
    }

    private static int read(Reader reader, char[] cArr, int i, int i2) throws IOException {
        int i3;
        int read;
        if (!$assertionsDisabled && i2 < 0) {
            throw new AssertionError("length must not be negative: " + i2);
        }
        int i4 = i2;
        while (true) {
            i3 = i4;
            if (i3 <= 0 || -1 == (read = reader.read(cArr, i + (i2 - i3), i3))) {
                break;
            }
            i4 = i3 - read;
        }
        return i2 - i3;
    }

    private boolean incrementTokenBuffer() {
        int i;
        int current = this.breaker.current();
        if (current == -1) {
            return false;
        }
        int next = this.breaker.next();
        while (true) {
            i = next;
            if (current == -1 || this.breaker.getRuleStatus() != 0) {
                break;
            }
            current = i;
            next = this.breaker.next();
        }
        if (current == -1) {
            return false;
        }
        this.termAtt.copyBuffer(this.buffer, current, i - current);
        this.offsetAtt.setOffset(correctOffset(this.offset + current), correctOffset(this.offset + i));
        this.typeAtt.setType(this.config.getType(this.breaker.getScriptCode(), this.breaker.getRuleStatus()));
        this.scriptAtt.setCode(this.breaker.getScriptCode());
        return true;
    }

    static {
        $assertionsDisabled = !ICUTokenizer.class.desiredAssertionStatus();
    }
}
