package org.apache.lucene.analysis.minhash;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory;
import org.apache.lucene.analysis.minhash.MinHashFilter;
import org.apache.lucene.analysis.shingle.ShingleFilterFactory;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
import org.junit.Test;

/* loaded from: input_file:org/apache/lucene/analysis/minhash/MinHashFilterIT.class */
public class MinHashFilterIT extends BaseTokenStreamTestCase {
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:org/apache/lucene/analysis/minhash/MinHashFilterIT$TokenizerChain.class */
    public static class TokenizerChain extends Analyzer {
        private final CharFilterFactory[] charFilters;
        private final TokenizerFactory tokenizer;
        private final TokenFilterFactory[] filters;

        public TokenizerChain(CharFilterFactory[] charFilterFactoryArr, TokenizerFactory tokenizerFactory, TokenFilterFactory[] tokenFilterFactoryArr) {
            this.charFilters = charFilterFactoryArr;
            this.tokenizer = tokenizerFactory;
            this.filters = tokenFilterFactoryArr;
        }

        public Reader initReader(String str, Reader reader) {
            if (this.charFilters != null && this.charFilters.length > 0) {
                Reader reader2 = reader;
                for (CharFilterFactory charFilterFactory : this.charFilters) {
                    reader2 = charFilterFactory.create(reader2);
                }
                reader = reader2;
            }
            return reader;
        }

        protected Analyzer.TokenStreamComponents createComponents(String str) {
            TokenStream create = this.tokenizer.create();
            TokenStream tokenStream = create;
            for (TokenFilterFactory tokenFilterFactory : this.filters) {
                tokenStream = tokenFilterFactory.create(tokenStream);
            }
            return new Analyzer.TokenStreamComponents(create, tokenStream);
        }

        public String toString() {
            StringBuilder sb = new StringBuilder("TokenizerChain(");
            for (CharFilterFactory charFilterFactory : this.charFilters) {
                sb.append(charFilterFactory);
                sb.append(", ");
            }
            sb.append(this.tokenizer);
            for (TokenFilterFactory tokenFilterFactory : this.filters) {
                sb.append(", ");
                sb.append(tokenFilterFactory);
            }
            sb.append(')');
            return sb.toString();
        }
    }

    @Test
    public void testIntHash() {
        MinHashFilter.LongPair longPair = new MinHashFilter.LongPair();
        MinHashFilter.murmurhash3_x64_128(MinHashFilter.getBytes(0), 0, 4, 0, longPair);
        assertEquals(-3485513579396041028L, longPair.val1);
        assertEquals(6383328099726337777L, longPair.val2);
    }

    @Test
    public void testStringHash() throws UnsupportedEncodingException {
        MinHashFilter.LongPair longPair = new MinHashFilter.LongPair();
        byte[] bytes = "woof woof woof woof woof".getBytes("UTF-16LE");
        MinHashFilter.murmurhash3_x64_128(bytes, 0, bytes.length, 0, longPair);
        assertEquals(7638079586852243959L, longPair.val1);
        assertEquals(4378804943379391304L, longPair.val2);
    }

    @Test
    public void testSimpleOrder() throws UnsupportedEncodingException {
        MinHashFilter.LongPair longPair = new MinHashFilter.LongPair();
        longPair.val1 = 1L;
        longPair.val2 = 2L;
        MinHashFilter.LongPair longPair2 = new MinHashFilter.LongPair();
        longPair2.val1 = 2L;
        longPair2.val2 = 1L;
        if (!$assertionsDisabled && longPair.compareTo(longPair2) <= 0) {
            throw new AssertionError();
        }
    }

    @Test
    public void testHashOrder() {
        assertTrue(!MinHashFilter.isLessThanUnsigned(0L, 0L));
        assertTrue(MinHashFilter.isLessThanUnsigned(0L, -1L));
        assertTrue(MinHashFilter.isLessThanUnsigned(1L, -1L));
        assertTrue(MinHashFilter.isLessThanUnsigned(-2L, -1L));
        assertTrue(MinHashFilter.isLessThanUnsigned(1L, 2L));
        assertTrue(MinHashFilter.isLessThanUnsigned(Long.MAX_VALUE, Long.MIN_VALUE));
        MinHashFilter.FixedSizeTreeSet fixedSizeTreeSet = new MinHashFilter.FixedSizeTreeSet(500);
        HashSet hashSet = new HashSet();
        for (int i = 0; i < 100; i++) {
            MinHashFilter.LongPair longPair = new MinHashFilter.LongPair();
            MinHashFilter.murmurhash3_x64_128(MinHashFilter.getBytes(i), 0, 4, 0, longPair);
            MinHashFilter.LongPair longPair2 = fixedSizeTreeSet.size() > 0 ? (MinHashFilter.LongPair) fixedSizeTreeSet.last() : null;
            if (!fixedSizeTreeSet.add(longPair)) {
                hashSet.add(longPair);
            } else if (longPair2 != null && fixedSizeTreeSet.size() == 500 && !longPair2.equals(fixedSizeTreeSet.last())) {
                hashSet.add(longPair2);
            }
        }
        assertEquals(100L, fixedSizeTreeSet.size());
        assertEquals(0L, hashSet.size());
        HashSet hashSet2 = new HashSet();
        HashSet hashSet3 = new HashSet();
        MinHashFilter.FixedSizeTreeSet fixedSizeTreeSet2 = new MinHashFilter.FixedSizeTreeSet(500);
        for (int i2 = 0; i2 < 1000000; i2++) {
            MinHashFilter.LongPair longPair3 = new MinHashFilter.LongPair();
            MinHashFilter.murmurhash3_x64_128(MinHashFilter.getBytes(i2), 0, 4, 0, longPair3);
            hashSet2.add(longPair3);
            MinHashFilter.LongPair longPair4 = fixedSizeTreeSet2.size() > 0 ? (MinHashFilter.LongPair) fixedSizeTreeSet2.last() : null;
            if (!fixedSizeTreeSet2.add(longPair3)) {
                hashSet3.add(longPair3);
            } else if (longPair4 != null && fixedSizeTreeSet2.size() == 500 && !longPair4.equals(fixedSizeTreeSet2.last())) {
                hashSet3.add(longPair4);
            }
        }
        assertEquals(1000000L, hashSet2.size());
        assertEquals(500L, fixedSizeTreeSet2.size());
        assertEquals(999500L, hashSet3.size());
        MinHashFilter.LongPair longPair5 = null;
        while (true) {
            MinHashFilter.LongPair longPair6 = (MinHashFilter.LongPair) fixedSizeTreeSet2.pollLast();
            if (longPair6 == null) {
                return;
            }
            if (longPair5 != null) {
                assertTrue(isLessThan(longPair6, longPair5));
            }
            longPair5 = longPair6;
        }
    }

    @Test
    public void testHashNotRepeated() {
        MinHashFilter.FixedSizeTreeSet fixedSizeTreeSet = new MinHashFilter.FixedSizeTreeSet(500);
        HashSet hashSet = new HashSet();
        for (int i = 0; i < 10000; i++) {
            MinHashFilter.LongPair longPair = new MinHashFilter.LongPair();
            MinHashFilter.murmurhash3_x64_128(MinHashFilter.getBytes(i), 0, 4, 0, longPair);
            MinHashFilter.LongPair longPair2 = fixedSizeTreeSet.size() > 0 ? (MinHashFilter.LongPair) fixedSizeTreeSet.last() : null;
            if (!fixedSizeTreeSet.add(longPair)) {
                hashSet.add(longPair);
            } else if (longPair2 != null && fixedSizeTreeSet.size() == 500 && !longPair2.equals(fixedSizeTreeSet.last())) {
                hashSet.add(longPair2);
            }
        }
        assertEquals(500L, fixedSizeTreeSet.size());
        MinHashFilter.LongPair longPair3 = null;
        while (true) {
            MinHashFilter.LongPair longPair4 = (MinHashFilter.LongPair) fixedSizeTreeSet.pollLast();
            if (longPair4 == null) {
                return;
            }
            if (longPair3 != null) {
                assertTrue(isLessThan(longPair4, longPair3));
            }
            longPair3 = longPair4;
        }
    }

    @Test
    public void testMockShingleTokenizer() throws IOException {
        assertTokenStreamContents(createMockShingleTokenizer(5, "woof woof woof woof woof woof woof woof woof puff"), new String[]{"woof woof woof woof woof", "woof woof woof woof puff"});
    }

    @Test
    public void testTokenStreamSingleInput() throws IOException {
        assertTokenStreamContents(createTokenStream(5, "woof woof woof woof woof", 1, 1, 100, false), new String[]{"℁팽徭聙↝ꇁ홱杯"}, new int[]{0}, new int[]{24}, new String[]{"MIN_HASH"}, new int[]{1}, new int[]{1}, 24, 0, null, true);
        assertTokenStreamContents(createTokenStream(5, "woof woof woof woof woof", 2, 1, 1, false), new String[]{new String(new char[]{0, 0, 8449, 54077, 64133, 32857, 8605, 41409}), "\u0000\u0001�4pN�|"}, new int[]{0, 0}, new int[]{24, 24}, new String[]{"MIN_HASH", "MIN_HASH"}, new int[]{1, 0}, new int[]{1, 1}, 24, 0, null, true);
    }

    @Test
    public void testTokenStream1() throws IOException {
        assertTokenStreamContents(createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 1, 1, 100, false), new String[]{"℁팽徭聙↝ꇁ홱杯", new String(new char[]{36347, 63457, 43013, 56843, 52284, 34231, 57934, 42302})}, new int[]{0, 0}, new int[]{49, 49}, new String[]{"MIN_HASH", "MIN_HASH"}, new int[]{1, 0}, new int[]{1, 1}, 49, 0, null, true);
    }

    private ArrayList<String> getTokens(TokenStream tokenStream) throws IOException {
        ArrayList<String> arrayList = new ArrayList<>();
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
            arrayList.add(new String(attribute.buffer(), 0, attribute.length()));
        }
        tokenStream.end();
        tokenStream.close();
        return arrayList;
    }

    private ArrayList<String> getTokens(Analyzer analyzer, String str, String str2) throws IOException {
        ArrayList<String> arrayList = new ArrayList<>();
        TokenStream tokenStream = analyzer.tokenStream(str, str2);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
            arrayList.add(new String(attribute.buffer(), 0, attribute.length()));
        }
        tokenStream.end();
        tokenStream.close();
        return arrayList;
    }

    @Test
    public void testTokenStream2() throws IOException {
        TokenStream createTokenStream = createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 100, 1, 1, false);
        ArrayList<String> tokens = getTokens(createTokenStream);
        createTokenStream.close();
        assertEquals(100L, tokens.size());
    }

    @Test
    public void testTokenStream3() throws IOException {
        TokenStream createTokenStream = createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 10, 1, 10, false);
        ArrayList<String> tokens = getTokens(createTokenStream);
        createTokenStream.close();
        assertEquals(20L, tokens.size());
    }

    @Test
    public void testTokenStream4() throws IOException {
        TokenStream createTokenStream = createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 10, 10, 1, false);
        ArrayList<String> tokens = getTokens(createTokenStream);
        createTokenStream.close();
        assertEquals(20L, tokens.size());
        TokenStream createTokenStream2 = createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 10, 10, 1, true);
        ArrayList<String> tokens2 = getTokens(createTokenStream2);
        createTokenStream2.close();
        assertEquals(100L, tokens2.size());
    }

    @Test
    public void testLSHQuery() throws IOException {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(createMinHashAnalyzer(5, 1, 100));
        RAMDirectory rAMDirectory = new RAMDirectory();
        IndexWriter indexWriter = new IndexWriter(rAMDirectory, indexWriterConfig);
        Document document = new Document();
        document.add(new TextField("text", "woof woof woof woof woof", Field.Store.NO));
        indexWriter.addDocument(document);
        Document document2 = new Document();
        document2.add(new TextField("text", "woof woof woof woof woof puff", Field.Store.NO));
        indexWriter.addDocument(document2);
        Document document3 = new Document();
        document3.add(new TextField("text", "woof woof woof woof puff", Field.Store.NO));
        indexWriter.addDocument(document3);
        indexWriter.commit();
        indexWriter.close();
        IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(rAMDirectory));
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        builder.add(new ConstantScoreQuery(new TermQuery(new Term("text", "℁팽徭聙↝ꇁ홱杯"))), BooleanClause.Occur.SHOULD);
        builder.add(new ConstantScoreQuery(new TermQuery(new Term("text", new String(new char[]{36347, 63457, 43013, 56843, 52284, 34231, 57934, 42302})))), BooleanClause.Occur.SHOULD);
        builder.setDisableCoord(true);
        TopDocs search = indexSearcher.search(builder.build(), 10);
        assertEquals(3L, search.totalHits);
        float f = search.scoreDocs[0].score;
        assertEquals(search.scoreDocs[1].score, f / 2.0f, 0.0f);
        assertEquals(search.scoreDocs[2].score, f / 2.0f, 0.0f);
    }

    @Test
    public void testLSHQuery2() throws IOException {
        String[] strArr = {"one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"};
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(createMinHashAnalyzer(5, 1, 100));
        RAMDirectory rAMDirectory = new RAMDirectory();
        IndexWriter indexWriter = new IndexWriter(rAMDirectory, indexWriterConfig);
        for (int i = 0; i < strArr.length; i++) {
            StringBuilder sb = new StringBuilder();
            for (int i2 = 0; i2 < strArr.length - i; i2++) {
                if (sb.length() > 0) {
                    sb.append(" ");
                }
                sb.append(strArr[i + i2]);
                if (i2 >= 5 - 1) {
                    Document document = new Document();
                    document.add(new TextField("text", sb.toString(), Field.Store.NO));
                    indexWriter.addDocument(document);
                }
            }
        }
        indexWriter.commit();
        indexWriter.close();
        IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(rAMDirectory));
        TopDocs search = indexSearcher.search(buildQuery("text", "one two three four five", 5, 1, 100), 100);
        assertEquals(6L, search.totalHits);
        assertAllScores(search, 1.0f);
        TopDocs search2 = indexSearcher.search(buildQuery("text", "two three four five six", 5, 1, 100), 100);
        assertEquals(10L, search2.totalHits);
        assertAllScores(search2, 1.0f);
        TopDocs search3 = indexSearcher.search(buildQuery("text", "three four five six seven", 5, 1, 100), 100);
        assertEquals(12L, search3.totalHits);
        assertAllScores(search3, 1.0f);
        TopDocs search4 = indexSearcher.search(buildQuery("text", "four five six seven eight", 5, 1, 100), 100);
        assertEquals(12L, search4.totalHits);
        assertAllScores(search4, 1.0f);
        TopDocs search5 = indexSearcher.search(buildQuery("text", "five six seven eight nine", 5, 1, 100), 100);
        assertEquals(10L, search5.totalHits);
        assertAllScores(search5, 1.0f);
        TopDocs search6 = indexSearcher.search(buildQuery("text", "six seven eight nine ten", 5, 1, 100), 100);
        assertEquals(6L, search6.totalHits);
        assertAllScores(search6, 1.0f);
        assertEquals(11L, indexSearcher.search(buildQuery("text", "one two three four five six", 5, 1, 100), 100).totalHits);
        TopDocs search7 = indexSearcher.search(buildQuery("text", "one two three four five six seven eight nine ten", 5, 1, 100), 100);
        assertEquals(21L, search7.totalHits);
        for (int i3 = 0; i3 < search7.totalHits; i3++) {
            System.out.println(i3 + " = " + search7.scoreDocs[i3]);
        }
        assertEquals(search7.scoreDocs[0].score, 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[1].score, (6.0f * 5.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[2].score, (6.0f * 5.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[3].score, (6.0f * 4.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[4].score, (6.0f * 4.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[5].score, (6.0f * 4.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[6].score, (6.0f * 3.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[7].score, (6.0f * 3.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[8].score, (6.0f * 3.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[9].score, (6.0f * 3.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[10].score, (6.0f * 2.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[11].score, (6.0f * 2.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[12].score, (6.0f * 2.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[13].score, (6.0f * 2.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[14].score, (6.0f * 2.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[15].score, (6.0f * 1.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[16].score, (6.0f * 1.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[17].score, (6.0f * 1.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[18].score, (6.0f * 1.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[19].score, (6.0f * 1.0f) / 6.0f, 0.001f);
        assertEquals(search7.scoreDocs[20].score, (6.0f * 1.0f) / 6.0f, 0.001f);
    }

    private void assertAllScores(TopDocs topDocs, float f) {
        for (int i = 0; i < topDocs.totalHits; i++) {
            assertEquals(topDocs.scoreDocs[i].score, f, 0.0f);
        }
    }

    private Query buildQuery(String str, String str2, int i, int i2, int i3) throws IOException {
        TokenizerChain createMinHashAnalyzer = createMinHashAnalyzer(i, i2, i3);
        ArrayList<String> tokens = getTokens(createMinHashAnalyzer, str, str2);
        createMinHashAnalyzer.close();
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        Iterator<String> it = tokens.iterator();
        while (it.hasNext()) {
            builder.add(new ConstantScoreQuery(new TermQuery(new Term("text", it.next()))), BooleanClause.Occur.SHOULD);
        }
        builder.setDisableCoord(true);
        return builder.build();
    }

    public static TokenStream createTokenStream(int i, String str, int i2, int i3, int i4, boolean z) {
        Tokenizer createMockShingleTokenizer = createMockShingleTokenizer(i, str);
        HashMap hashMap = new HashMap();
        hashMap.put("hashCount", i2);
        hashMap.put("bucketCount", i3);
        hashMap.put("hashSetSize", i4);
        hashMap.put("withRotation", z);
        return new MinHashFilterFactory(hashMap).create(createMockShingleTokenizer);
    }

    public static TokenizerChain createMinHashAnalyzer(int i, int i2, int i3) {
        WhitespaceTokenizerFactory whitespaceTokenizerFactory = new WhitespaceTokenizerFactory(Collections.emptyMap());
        HashMap hashMap = new HashMap();
        hashMap.put("minShingleSize", i);
        hashMap.put("maxShingleSize", i);
        hashMap.put("outputUnigrams", "false");
        hashMap.put("outputUnigramsIfNoShingles", "false");
        hashMap.put("tokenSeparator", " ");
        TokenFilterFactory shingleFilterFactory = new ShingleFilterFactory(hashMap);
        HashMap hashMap2 = new HashMap();
        hashMap2.put("hashCount", i2);
        hashMap2.put("hashSetSize", i3);
        return new TokenizerChain(new CharFilterFactory[0], whitespaceTokenizerFactory, new TokenFilterFactory[]{shingleFilterFactory, new MinHashFilterFactory(hashMap2)});
    }

    public static Tokenizer createMockShingleTokenizer(int i, String str) {
        MockTokenizer mockTokenizer = new MockTokenizer(new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+([ \t\r\n]+[^ \t\r\n]+){4}").toAutomaton()), true);
        mockTokenizer.setEnableChecks(true);
        if (str != null) {
            mockTokenizer.setReader(new StringReader(str));
        }
        return mockTokenizer;
    }

    private boolean isLessThan(MinHashFilter.LongPair longPair, MinHashFilter.LongPair longPair2) {
        if (MinHashFilter.isLessThanUnsigned(longPair.val2, longPair2.val2)) {
            return true;
        }
        if (longPair.val2 == longPair2.val2) {
            return MinHashFilter.isLessThanUnsigned(longPair.val1, longPair2.val1);
        }
        return false;
    }

    static {
        $assertionsDisabled = !MinHashFilterIT.class.desiredAssertionStatus();
    }
}
