package org.apache.solr.handler.clustering.carrot2;

import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.carrot2.core.LanguageCode;
import org.carrot2.text.analysis.ExtendedWhitespaceTokenizer;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.linguistic.ITokenizerFactory;
import org.carrot2.text.util.MutableCharArray;
import org.carrot2.util.ExceptionUtils;
import org.carrot2.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.class */
public class LuceneCarrot2TokenizerFactory implements ITokenizerFactory {
    private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.apache.solr.handler.clustering.carrot2.LuceneCarrot2TokenizerFactory$1, reason: invalid class name */
    /* loaded from: input_file:org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$carrot2$core$LanguageCode = new int[LanguageCode.values().length];

        static {
            try {
                $SwitchMap$org$carrot2$core$LanguageCode[LanguageCode.CHINESE_SIMPLIFIED.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$carrot2$core$LanguageCode[LanguageCode.ARABIC.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
        }
    }

    /* loaded from: input_file:org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory$ChineseTokenizerFactory.class */
    private static final class ChineseTokenizerFactory {

        /* JADX INFO: Access modifiers changed from: private */
        /* loaded from: input_file:org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory$ChineseTokenizerFactory$ChineseTokenizer.class */
        public static final class ChineseTokenizer implements ITokenizer {
            private static final Pattern numeric = Pattern.compile("[\\-+'$]?\\d+([:\\-/,.]?\\d+)*[%$]?");
            private Tokenizer sentenceTokenizer;
            private TokenStream wordTokenFilter;
            private CharTermAttribute term;
            private final MutableCharArray tempCharSequence;
            private final Class<?> tokenFilterClass;

            private ChineseTokenizer() throws Exception {
                this.term = null;
                this.tempCharSequence = new MutableCharArray(new char[0]);
                this.sentenceTokenizer = (Tokenizer) ReflectionUtils.classForName("org.apache.lucene.analysis.cn.smart.SentenceTokenizer", false).getConstructor(Reader.class).newInstance((Reader) null);
                this.tokenFilterClass = ReflectionUtils.classForName("org.apache.lucene.analysis.cn.smart.WordTokenFilter", false);
            }

            public short nextToken() throws IOException {
                if (!this.wordTokenFilter.incrementToken()) {
                    return (short) -1;
                }
                char[] buffer = this.term.buffer();
                int length = this.term.length();
                this.tempCharSequence.reset(buffer, 0, length);
                return (length == 1 && buffer[0] == ',') ? (short) 3 : numeric.matcher(this.tempCharSequence).matches() ? (short) 2 : (short) 1;
            }

            public void setTermBuffer(MutableCharArray mutableCharArray) {
                mutableCharArray.reset(this.term.buffer(), 0, this.term.length());
            }

            public void reset(Reader reader) {
                try {
                    this.sentenceTokenizer.setReader(reader);
                    this.wordTokenFilter = (TokenStream) this.tokenFilterClass.getConstructor(TokenStream.class).newInstance(this.sentenceTokenizer);
                    this.term = this.wordTokenFilter.addAttribute(CharTermAttribute.class);
                } catch (Exception e) {
                    throw ExceptionUtils.wrapAsRuntimeException(e);
                }
            }

            /* synthetic */ ChineseTokenizer(AnonymousClass1 anonymousClass1) throws Exception {
                this();
            }
        }

        private ChineseTokenizerFactory() {
        }

        static ITokenizer createTokenizer() {
            try {
                return new ChineseTokenizer(null);
            } catch (Throwable th) {
                if (th instanceof OutOfMemoryError) {
                    throw ((OutOfMemoryError) th);
                }
                return new ExtendedWhitespaceTokenizer();
            }
        }

        static {
            boolean z;
            Error error;
            try {
                ReflectionUtils.classForName("org.apache.lucene.analysis.cn.smart.WordTokenFilter", false);
                ReflectionUtils.classForName("org.apache.lucene.analysis.cn.smart.SentenceTokenizer", false);
            } finally {
                if (z) {
                }
            }
        }
    }

    public ITokenizer getTokenizer(LanguageCode languageCode) {
        switch (AnonymousClass1.$SwitchMap$org$carrot2$core$LanguageCode[languageCode.ordinal()]) {
            case 1:
                return ChineseTokenizerFactory.createTokenizer();
            case 2:
            default:
                return new ExtendedWhitespaceTokenizer();
        }
    }
}
