package org.apache.lucene.analysis.shingle;

import java.io.IOException;
import java.io.StringReader;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;

/* loaded from: input_file:org/apache/lucene/analysis/shingle/ShingleFilterTest.class */
public class ShingleFilterTest extends BaseTokenStreamTestCase {
    public static Token[] testTokenWithHoles;
    public static final Token[] TEST_TOKEN = {createToken("please", 0, 6), createToken("divide", 7, 13), createToken("this", 14, 18), createToken("sentence", 19, 27), createToken("into", 28, 32), createToken("shingles", 33, 39)};
    public static final int[] UNIGRAM_ONLY_POSITION_INCREMENTS = {1, 1, 1, 1, 1, 1};
    public static final String[] UNIGRAM_ONLY_TYPES = {"word", "word", "word", "word", "word", "word"};
    public static final Token[] BI_GRAM_TOKENS = {createToken("please", 0, 6), createToken("please divide", 0, 13), createToken("divide", 7, 13), createToken("divide this", 7, 18), createToken("this", 14, 18), createToken("this sentence", 14, 27), createToken("sentence", 19, 27), createToken("sentence into", 19, 32), createToken("into", 28, 32), createToken("into shingles", 28, 39), createToken("shingles", 33, 39)};
    public static final int[] BI_GRAM_POSITION_INCREMENTS = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
    public static final String[] BI_GRAM_TYPES = {"word", "shingle", "word", "shingle", "word", "shingle", "word", "shingle", "word", "shingle", "word"};
    public static final Token[] BI_GRAM_TOKENS_WITH_HOLES = {createToken("please", 0, 6), createToken("please divide", 0, 13), createToken("divide", 7, 13), createToken("divide _", 7, 19), createToken("_ sentence", 19, 27), createToken("sentence", 19, 27), createToken("sentence _", 19, 33), createToken("_ shingles", 33, 39), createToken("shingles", 33, 39)};
    public static final int[] BI_GRAM_POSITION_INCREMENTS_WITH_HOLES = {1, 0, 1, 0, 1, 1, 0, 1, 1};
    private static final String[] BI_GRAM_TYPES_WITH_HOLES = {"word", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word"};
    public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS = {createToken("please divide", 0, 13), createToken("divide this", 7, 18), createToken("this sentence", 14, 27), createToken("sentence into", 19, 32), createToken("into shingles", 28, 39)};
    public static final int[] BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = {1, 1, 1, 1, 1};
    public static final String[] BI_GRAM_TYPES_WITHOUT_UNIGRAMS = {"shingle", "shingle", "shingle", "shingle", "shingle"};
    public static final Token[] BI_GRAM_TOKENS_WITH_HOLES_WITHOUT_UNIGRAMS = {createToken("please divide", 0, 13), createToken("divide _", 7, 19), createToken("_ sentence", 19, 27), createToken("sentence _", 19, 33), createToken("_ shingles", 33, 39)};
    public static final int[] BI_GRAM_POSITION_INCREMENTS_WITH_HOLES_WITHOUT_UNIGRAMS = {1, 1, 1, 1, 1, 1};
    public static final Token[] TEST_SINGLE_TOKEN = {createToken("please", 0, 6)};
    public static final Token[] SINGLE_TOKEN = {createToken("please", 0, 6)};
    public static final int[] SINGLE_TOKEN_INCREMENTS = {1};
    public static final String[] SINGLE_TOKEN_TYPES = {"word"};
    public static final Token[] EMPTY_TOKEN_ARRAY = new Token[0];
    public static final int[] EMPTY_TOKEN_INCREMENTS_ARRAY = new int[0];
    public static final String[] EMPTY_TOKEN_TYPES_ARRAY = new String[0];
    public static final Token[] TRI_GRAM_TOKENS = {createToken("please", 0, 6), createToken("please divide", 0, 13), createToken("please divide this", 0, 18), createToken("divide", 7, 13), createToken("divide this", 7, 18), createToken("divide this sentence", 7, 27), createToken("this", 14, 18), createToken("this sentence", 14, 27), createToken("this sentence into", 14, 32), createToken("sentence", 19, 27), createToken("sentence into", 19, 32), createToken("sentence into shingles", 19, 39), createToken("into", 28, 32), createToken("into shingles", 28, 39), createToken("shingles", 33, 39)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
    public static final String[] TRI_GRAM_TYPES = {"word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "word"};
    public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS = {createToken("please divide", 0, 13), createToken("please divide this", 0, 18), createToken("divide this", 7, 18), createToken("divide this sentence", 7, 27), createToken("this sentence", 14, 27), createToken("this sentence into", 14, 32), createToken("sentence into", 19, 32), createToken("sentence into shingles", 19, 39), createToken("into shingles", 28, 39)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = {1, 0, 1, 0, 1, 0, 1, 0, 1};
    public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS = {"shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle"};
    public static final Token[] FOUR_GRAM_TOKENS = {createToken("please", 0, 6), createToken("please divide", 0, 13), createToken("please divide this", 0, 18), createToken("please divide this sentence", 0, 27), createToken("divide", 7, 13), createToken("divide this", 7, 18), createToken("divide this sentence", 7, 27), createToken("divide this sentence into", 7, 32), createToken("this", 14, 18), createToken("this sentence", 14, 27), createToken("this sentence into", 14, 32), createToken("this sentence into shingles", 14, 39), createToken("sentence", 19, 27), createToken("sentence into", 19, 32), createToken("sentence into shingles", 19, 39), createToken("into", 28, 32), createToken("into shingles", 28, 39), createToken("shingles", 33, 39)};
    public static final int[] FOUR_GRAM_POSITION_INCREMENTS = {1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1};
    public static final String[] FOUR_GRAM_TYPES = {"word", "shingle", "shingle", "shingle", "word", "shingle", "shingle", "shingle", "word", "shingle", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "word"};
    public static final Token[] FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS = {createToken("please divide", 0, 13), createToken("please divide this", 0, 18), createToken("please divide this sentence", 0, 27), createToken("divide this", 7, 18), createToken("divide this sentence", 7, 27), createToken("divide this sentence into", 7, 32), createToken("this sentence", 14, 27), createToken("this sentence into", 14, 32), createToken("this sentence into shingles", 14, 39), createToken("sentence into", 19, 32), createToken("sentence into shingles", 19, 39), createToken("into shingles", 28, 39)};
    public static final int[] FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
    public static final String[] FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS = {"shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle"};
    public static final Token[] TRI_GRAM_TOKENS_MIN_TRI_GRAM = {createToken("please", 0, 6), createToken("please divide this", 0, 18), createToken("divide", 7, 13), createToken("divide this sentence", 7, 27), createToken("this", 14, 18), createToken("this sentence into", 14, 32), createToken("sentence", 19, 27), createToken("sentence into shingles", 19, 39), createToken("into", 28, 32), createToken("shingles", 33, 39)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM = {1, 0, 1, 0, 1, 0, 1, 0, 1, 1};
    public static final String[] TRI_GRAM_TYPES_MIN_TRI_GRAM = {"word", "shingle", "word", "shingle", "word", "shingle", "word", "shingle", "word", "word"};
    public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = {createToken("please divide this", 0, 18), createToken("divide this sentence", 7, 27), createToken("this sentence into", 14, 32), createToken("sentence into shingles", 19, 39)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = {1, 1, 1, 1};
    public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = {"shingle", "shingle", "shingle", "shingle"};
    public static final Token[] FOUR_GRAM_TOKENS_MIN_TRI_GRAM = {createToken("please", 0, 6), createToken("please divide this", 0, 18), createToken("please divide this sentence", 0, 27), createToken("divide", 7, 13), createToken("divide this sentence", 7, 27), createToken("divide this sentence into", 7, 32), createToken("this", 14, 18), createToken("this sentence into", 14, 32), createToken("this sentence into shingles", 14, 39), createToken("sentence", 19, 27), createToken("sentence into shingles", 19, 39), createToken("into", 28, 32), createToken("shingles", 33, 39)};
    public static final int[] FOUR_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1};
    public static final String[] FOUR_GRAM_TYPES_MIN_TRI_GRAM = {"word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "word", "word"};
    public static final Token[] FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = {createToken("please divide this", 0, 18), createToken("please divide this sentence", 0, 27), createToken("divide this sentence", 7, 27), createToken("divide this sentence into", 7, 32), createToken("this sentence into", 14, 32), createToken("this sentence into shingles", 14, 39), createToken("sentence into shingles", 19, 39)};
    public static final int[] FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = {1, 0, 1, 0, 1, 0, 1};
    public static final String[] FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = {"shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle"};
    public static final Token[] FOUR_GRAM_TOKENS_MIN_FOUR_GRAM = {createToken("please", 0, 6), createToken("please divide this sentence", 0, 27), createToken("divide", 7, 13), createToken("divide this sentence into", 7, 32), createToken("this", 14, 18), createToken("this sentence into shingles", 14, 39), createToken("sentence", 19, 27), createToken("into", 28, 32), createToken("shingles", 33, 39)};
    public static final int[] FOUR_GRAM_POSITION_INCREMENTS_MIN_FOUR_GRAM = {1, 0, 1, 0, 1, 0, 1, 1, 1};
    public static final String[] FOUR_GRAM_TYPES_MIN_FOUR_GRAM = {"word", "shingle", "word", "shingle", "word", "shingle", "word", "word", "word"};
    public static final Token[] FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM = {createToken("please divide this sentence", 0, 27), createToken("divide this sentence into", 7, 32), createToken("this sentence into shingles", 14, 39)};
    public static final int[] FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM = {1, 1, 1};
    public static final String[] FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM = {"shingle", "shingle", "shingle"};
    public static final Token[] BI_GRAM_TOKENS_NO_SEPARATOR = {createToken("please", 0, 6), createToken("pleasedivide", 0, 13), createToken("divide", 7, 13), createToken("dividethis", 7, 18), createToken("this", 14, 18), createToken("thissentence", 14, 27), createToken("sentence", 19, 27), createToken("sentenceinto", 19, 32), createToken("into", 28, 32), createToken("intoshingles", 28, 39), createToken("shingles", 33, 39)};
    public static final int[] BI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
    public static final String[] BI_GRAM_TYPES_NO_SEPARATOR = {"word", "shingle", "word", "shingle", "word", "shingle", "word", "shingle", "word", "shingle", "word"};
    public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR = {createToken("pleasedivide", 0, 13), createToken("dividethis", 7, 18), createToken("thissentence", 14, 27), createToken("sentenceinto", 19, 32), createToken("intoshingles", 28, 39)};
    public static final int[] BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR = {1, 1, 1, 1, 1};
    public static final String[] BI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR = {"shingle", "shingle", "shingle", "shingle", "shingle"};
    public static final Token[] TRI_GRAM_TOKENS_NO_SEPARATOR = {createToken("please", 0, 6), createToken("pleasedivide", 0, 13), createToken("pleasedividethis", 0, 18), createToken("divide", 7, 13), createToken("dividethis", 7, 18), createToken("dividethissentence", 7, 27), createToken("this", 14, 18), createToken("thissentence", 14, 27), createToken("thissentenceinto", 14, 32), createToken("sentence", 19, 27), createToken("sentenceinto", 19, 32), createToken("sentenceintoshingles", 19, 39), createToken("into", 28, 32), createToken("intoshingles", 28, 39), createToken("shingles", 33, 39)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
    public static final String[] TRI_GRAM_TYPES_NO_SEPARATOR = {"word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "word"};
    public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR = {createToken("pleasedivide", 0, 13), createToken("pleasedividethis", 0, 18), createToken("dividethis", 7, 18), createToken("dividethissentence", 7, 27), createToken("thissentence", 14, 27), createToken("thissentenceinto", 14, 32), createToken("sentenceinto", 19, 32), createToken("sentenceintoshingles", 19, 39), createToken("intoshingles", 28, 39)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR = {1, 0, 1, 0, 1, 0, 1, 0, 1};
    public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR = {"shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle"};
    public static final Token[] BI_GRAM_TOKENS_ALT_SEPARATOR = {createToken("please", 0, 6), createToken("please<SEP>divide", 0, 13), createToken("divide", 7, 13), createToken("divide<SEP>this", 7, 18), createToken("this", 14, 18), createToken("this<SEP>sentence", 14, 27), createToken("sentence", 19, 27), createToken("sentence<SEP>into", 19, 32), createToken("into", 28, 32), createToken("into<SEP>shingles", 28, 39), createToken("shingles", 33, 39)};
    public static final int[] BI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
    public static final String[] BI_GRAM_TYPES_ALT_SEPARATOR = {"word", "shingle", "word", "shingle", "word", "shingle", "word", "shingle", "word", "shingle", "word"};
    public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = {createToken("please<SEP>divide", 0, 13), createToken("divide<SEP>this", 7, 18), createToken("this<SEP>sentence", 14, 27), createToken("sentence<SEP>into", 19, 32), createToken("into<SEP>shingles", 28, 39)};
    public static final int[] BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = {1, 1, 1, 1, 1};
    public static final String[] BI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR = {"shingle", "shingle", "shingle", "shingle", "shingle"};
    public static final Token[] TRI_GRAM_TOKENS_ALT_SEPARATOR = {createToken("please", 0, 6), createToken("please<SEP>divide", 0, 13), createToken("please<SEP>divide<SEP>this", 0, 18), createToken("divide", 7, 13), createToken("divide<SEP>this", 7, 18), createToken("divide<SEP>this<SEP>sentence", 7, 27), createToken("this", 14, 18), createToken("this<SEP>sentence", 14, 27), createToken("this<SEP>sentence<SEP>into", 14, 32), createToken("sentence", 19, 27), createToken("sentence<SEP>into", 19, 32), createToken("sentence<SEP>into<SEP>shingles", 19, 39), createToken("into", 28, 32), createToken("into<SEP>shingles", 28, 39), createToken("shingles", 33, 39)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
    public static final String[] TRI_GRAM_TYPES_ALT_SEPARATOR = {"word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "word"};
    public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = {createToken("please<SEP>divide", 0, 13), createToken("please<SEP>divide<SEP>this", 0, 18), createToken("divide<SEP>this", 7, 18), createToken("divide<SEP>this<SEP>sentence", 7, 27), createToken("this<SEP>sentence", 14, 27), createToken("this<SEP>sentence<SEP>into", 14, 32), createToken("sentence<SEP>into", 19, 32), createToken("sentence<SEP>into<SEP>shingles", 19, 39), createToken("into<SEP>shingles", 28, 39)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = {1, 0, 1, 0, 1, 0, 1, 0, 1};
    public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR = {"shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle"};
    public static final Token[] TRI_GRAM_TOKENS_NULL_SEPARATOR = {createToken("please", 0, 6), createToken("pleasedivide", 0, 13), createToken("pleasedividethis", 0, 18), createToken("divide", 7, 13), createToken("dividethis", 7, 18), createToken("dividethissentence", 7, 27), createToken("this", 14, 18), createToken("thissentence", 14, 27), createToken("thissentenceinto", 14, 32), createToken("sentence", 19, 27), createToken("sentenceinto", 19, 32), createToken("sentenceintoshingles", 19, 39), createToken("into", 28, 32), createToken("intoshingles", 28, 39), createToken("shingles", 33, 39)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_NULL_SEPARATOR = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
    public static final String[] TRI_GRAM_TYPES_NULL_SEPARATOR = {"word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "word"};
    public static final Token[] TEST_TOKEN_POS_INCR_EQUAL_TO_N = {createToken("please", 0, 6), createToken("divide", 7, 13), createToken("this", 14, 18), createToken("sentence", 29, 37, 3), createToken("into", 38, 42), createToken("shingles", 43, 49)};
    public static final Token[] TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N = {createToken("please", 0, 6), createToken("please divide", 0, 13), createToken("please divide this", 0, 18), createToken("divide", 7, 13), createToken("divide this", 7, 18), createToken("divide this _", 7, 29), createToken("this", 14, 18), createToken("this _", 14, 29), createToken("this _ _", 14, 29), createToken("_ _ sentence", 29, 37), createToken("_ sentence", 29, 37), createToken("_ sentence into", 29, 42), createToken("sentence", 29, 37), createToken("sentence into", 29, 42), createToken("sentence into shingles", 29, 49), createToken("into", 38, 42), createToken("into shingles", 38, 49), createToken("shingles", 43, 49)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1};
    public static final String[] TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N = {"word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "shingle", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "word"};
    public static final Token[] TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = {createToken("please divide", 0, 13), createToken("please divide this", 0, 18), createToken("divide this", 7, 18), createToken("divide this _", 7, 29), createToken("this _", 14, 29), createToken("this _ _", 14, 29), createToken("_ _ sentence", 29, 37), createToken("_ sentence", 29, 37), createToken("_ sentence into", 29, 42), createToken("sentence into", 29, 42), createToken("sentence into shingles", 29, 49), createToken("into shingles", 38, 49)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = {1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1};
    public static final String[] TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = {"shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle"};
    public static final Token[] TEST_TOKEN_POS_INCR_GREATER_THAN_N = {createToken("please", 0, 6), createToken("divide", 57, 63, 8), createToken("this", 64, 68), createToken("sentence", 69, 77), createToken("into", 78, 82), createToken("shingles", 83, 89)};
    public static final Token[] TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N = {createToken("please", 0, 6), createToken("please _", 0, 57), createToken("please _ _", 0, 57), createToken("_ _ divide", 57, 63), createToken("_ divide", 57, 63), createToken("_ divide this", 57, 68), createToken("divide", 57, 63), createToken("divide this", 57, 68), createToken("divide this sentence", 57, 77), createToken("this", 64, 68), createToken("this sentence", 64, 77), createToken("this sentence into", 64, 82), createToken("sentence", 69, 77), createToken("sentence into", 69, 82), createToken("sentence into shingles", 69, 89), createToken("into", 78, 82), createToken("into shingles", 78, 89), createToken("shingles", 83, 89)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N = {1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1};
    public static final String[] TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N = {"word", "shingle", "shingle", "shingle", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "shingle", "word", "shingle", "word"};
    public static final Token[] TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = {createToken("please _", 0, 57), createToken("please _ _", 0, 57), createToken("_ _ divide", 57, 63), createToken("_ divide", 57, 63), createToken("_ divide this", 57, 68), createToken("divide this", 57, 68), createToken("divide this sentence", 57, 77), createToken("this sentence", 64, 77), createToken("this sentence into", 64, 82), createToken("sentence into", 69, 82), createToken("sentence into shingles", 69, 89), createToken("into shingles", 78, 89)};
    public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = {1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1};
    public static final String[] TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = {"shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle", "shingle"};

    public void setUp() throws Exception {
        super.setUp();
        testTokenWithHoles = new Token[]{createToken("please", 0, 6), createToken("divide", 7, 13), createToken("sentence", 19, 27, 2), createToken("shingles", 33, 39, 2)};
    }

    public void testBiGramFilter() throws IOException {
        shingleFilterTest(2, TEST_TOKEN, BI_GRAM_TOKENS, BI_GRAM_POSITION_INCREMENTS, BI_GRAM_TYPES, true);
    }

    public void testBiGramFilterWithHoles() throws IOException {
        shingleFilterTest(2, testTokenWithHoles, BI_GRAM_TOKENS_WITH_HOLES, BI_GRAM_POSITION_INCREMENTS_WITH_HOLES, BI_GRAM_TYPES_WITH_HOLES, true);
    }

    public void testBiGramFilterWithoutUnigrams() throws IOException {
        shingleFilterTest(2, TEST_TOKEN, BI_GRAM_TOKENS_WITHOUT_UNIGRAMS, BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, BI_GRAM_TYPES_WITHOUT_UNIGRAMS, false);
    }

    public void testBiGramFilterWithHolesWithoutUnigrams() throws IOException {
        shingleFilterTest(2, testTokenWithHoles, BI_GRAM_TOKENS_WITH_HOLES_WITHOUT_UNIGRAMS, BI_GRAM_POSITION_INCREMENTS_WITH_HOLES_WITHOUT_UNIGRAMS, BI_GRAM_TYPES_WITHOUT_UNIGRAMS, false);
    }

    public void testBiGramFilterWithSingleToken() throws IOException {
        shingleFilterTest(2, TEST_SINGLE_TOKEN, SINGLE_TOKEN, SINGLE_TOKEN_INCREMENTS, SINGLE_TOKEN_TYPES, true);
    }

    public void testBiGramFilterWithSingleTokenWithoutUnigrams() throws IOException {
        shingleFilterTest(2, TEST_SINGLE_TOKEN, EMPTY_TOKEN_ARRAY, EMPTY_TOKEN_INCREMENTS_ARRAY, EMPTY_TOKEN_TYPES_ARRAY, false);
    }

    public void testBiGramFilterWithEmptyTokenStream() throws IOException {
        shingleFilterTest(2, EMPTY_TOKEN_ARRAY, EMPTY_TOKEN_ARRAY, EMPTY_TOKEN_INCREMENTS_ARRAY, EMPTY_TOKEN_TYPES_ARRAY, true);
    }

    public void testBiGramFilterWithEmptyTokenStreamWithoutUnigrams() throws IOException {
        shingleFilterTest(2, EMPTY_TOKEN_ARRAY, EMPTY_TOKEN_ARRAY, EMPTY_TOKEN_INCREMENTS_ARRAY, EMPTY_TOKEN_TYPES_ARRAY, false);
    }

    public void testTriGramFilter() throws IOException {
        shingleFilterTest(3, TEST_TOKEN, TRI_GRAM_TOKENS, TRI_GRAM_POSITION_INCREMENTS, TRI_GRAM_TYPES, true);
    }

    public void testTriGramFilterWithoutUnigrams() throws IOException {
        shingleFilterTest(3, TEST_TOKEN, TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS, TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, TRI_GRAM_TYPES_WITHOUT_UNIGRAMS, false);
    }

    public void testFourGramFilter() throws IOException {
        shingleFilterTest(4, TEST_TOKEN, FOUR_GRAM_TOKENS, FOUR_GRAM_POSITION_INCREMENTS, FOUR_GRAM_TYPES, true);
    }

    public void testFourGramFilterWithoutUnigrams() throws IOException {
        shingleFilterTest(4, TEST_TOKEN, FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS, FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS, false);
    }

    public void testTriGramFilterMinTriGram() throws IOException {
        shingleFilterTest(3, 3, TEST_TOKEN, TRI_GRAM_TOKENS_MIN_TRI_GRAM, TRI_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM, TRI_GRAM_TYPES_MIN_TRI_GRAM, true);
    }

    public void testTriGramFilterWithoutUnigramsMinTriGram() throws IOException {
        shingleFilterTest(3, 3, TEST_TOKEN, TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, false);
    }

    public void testFourGramFilterMinTriGram() throws IOException {
        shingleFilterTest(3, 4, TEST_TOKEN, FOUR_GRAM_TOKENS_MIN_TRI_GRAM, FOUR_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM, FOUR_GRAM_TYPES_MIN_TRI_GRAM, true);
    }

    public void testFourGramFilterWithoutUnigramsMinTriGram() throws IOException {
        shingleFilterTest(3, 4, TEST_TOKEN, FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, false);
    }

    public void testFourGramFilterMinFourGram() throws IOException {
        shingleFilterTest(4, 4, TEST_TOKEN, FOUR_GRAM_TOKENS_MIN_FOUR_GRAM, FOUR_GRAM_POSITION_INCREMENTS_MIN_FOUR_GRAM, FOUR_GRAM_TYPES_MIN_FOUR_GRAM, true);
    }

    public void testFourGramFilterWithoutUnigramsMinFourGram() throws IOException {
        shingleFilterTest(4, 4, TEST_TOKEN, FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM, FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM, FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM, false);
    }

    public void testBiGramFilterNoSeparator() throws IOException {
        shingleFilterTest("", 2, 2, TEST_TOKEN, BI_GRAM_TOKENS_NO_SEPARATOR, BI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR, BI_GRAM_TYPES_NO_SEPARATOR, true);
    }

    public void testBiGramFilterWithoutUnigramsNoSeparator() throws IOException {
        shingleFilterTest("", 2, 2, TEST_TOKEN, BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR, BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR, BI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR, false);
    }

    public void testTriGramFilterNoSeparator() throws IOException {
        shingleFilterTest("", 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_NO_SEPARATOR, TRI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR, TRI_GRAM_TYPES_NO_SEPARATOR, true);
    }

    public void testTriGramFilterWithoutUnigramsNoSeparator() throws IOException {
        shingleFilterTest("", 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR, TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR, TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR, false);
    }

    public void testBiGramFilterAltSeparator() throws IOException {
        shingleFilterTest("<SEP>", 2, 2, TEST_TOKEN, BI_GRAM_TOKENS_ALT_SEPARATOR, BI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR, BI_GRAM_TYPES_ALT_SEPARATOR, true);
    }

    public void testBiGramFilterWithoutUnigramsAltSeparator() throws IOException {
        shingleFilterTest("<SEP>", 2, 2, TEST_TOKEN, BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR, BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR, BI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR, false);
    }

    public void testTriGramFilterAltSeparator() throws IOException {
        shingleFilterTest("<SEP>", 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_ALT_SEPARATOR, TRI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR, TRI_GRAM_TYPES_ALT_SEPARATOR, true);
    }

    public void testTriGramFilterWithoutUnigramsAltSeparator() throws IOException {
        shingleFilterTest("<SEP>", 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR, TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR, TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR, false);
    }

    public void testTriGramFilterNullSeparator() throws IOException {
        shingleFilterTest((String) null, 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_NULL_SEPARATOR, TRI_GRAM_POSITION_INCREMENTS_NULL_SEPARATOR, TRI_GRAM_TYPES_NULL_SEPARATOR, true);
    }

    public void testPositionIncrementEqualToN() throws IOException {
        shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_EQUAL_TO_N, TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N, TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N, TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N, true);
    }

    public void testPositionIncrementEqualToNWithoutUnigrams() throws IOException {
        shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_EQUAL_TO_N, TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, false);
    }

    public void testPositionIncrementGreaterThanN() throws IOException {
        shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_GREATER_THAN_N, TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N, TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N, TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N, true);
    }

    public void testPositionIncrementGreaterThanNWithoutUnigrams() throws IOException {
        shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_GREATER_THAN_N, TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, false);
    }

    public void testReset() throws Exception {
        WhitespaceTokenizer whitespaceTokenizer = new WhitespaceTokenizer();
        whitespaceTokenizer.setReader(new StringReader("please divide this sentence"));
        ShingleFilter shingleFilter = new ShingleFilter(whitespaceTokenizer, 2);
        assertTokenStreamContents(shingleFilter, new String[]{"please", "please divide", "divide", "divide this", "this", "this sentence", "sentence"}, new int[]{0, 0, 7, 7, 14, 14, 19}, new int[]{6, 13, 13, 18, 18, 27, 27}, new String[]{"word", "shingle", "word", "shingle", "word", "shingle", "word"}, new int[]{1, 0, 1, 0, 1, 0, 1});
        whitespaceTokenizer.setReader(new StringReader("please divide this sentence"));
        assertTokenStreamContents(shingleFilter, new String[]{"please", "please divide", "divide", "divide this", "this", "this sentence", "sentence"}, new int[]{0, 0, 7, 7, 14, 14, 19}, new int[]{6, 13, 13, 18, 18, 27, 27}, new String[]{"word", "shingle", "word", "shingle", "word", "shingle", "word"}, new int[]{1, 0, 1, 0, 1, 0, 1});
    }

    public void testOutputUnigramsIfNoShinglesSingleTokenCase() throws IOException {
        shingleFilterTest(2, 2, TEST_SINGLE_TOKEN, SINGLE_TOKEN, SINGLE_TOKEN_INCREMENTS, SINGLE_TOKEN_TYPES, false, true);
    }

    public void testOutputUnigramsIfNoShinglesWithSimpleBigram() throws IOException {
        shingleFilterTest(2, 2, TEST_TOKEN, BI_GRAM_TOKENS, BI_GRAM_POSITION_INCREMENTS, BI_GRAM_TYPES, true, true);
    }

    public void testOutputUnigramsIfNoShinglesWithSimpleUnigramlessBigram() throws IOException {
        shingleFilterTest(2, 2, TEST_TOKEN, BI_GRAM_TOKENS_WITHOUT_UNIGRAMS, BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, BI_GRAM_TYPES_WITHOUT_UNIGRAMS, false, true);
    }

    public void testOutputUnigramsIfNoShinglesWithMultipleInputTokens() throws IOException {
        shingleFilterTest(7, 7, TEST_TOKEN, TEST_TOKEN, UNIGRAM_ONLY_POSITION_INCREMENTS, UNIGRAM_ONLY_TYPES, false, true);
    }

    protected void shingleFilterTest(int i, Token[] tokenArr, Token[] tokenArr2, int[] iArr, String[] strArr, boolean z) throws IOException {
        ShingleFilter shingleFilter = new ShingleFilter(new CannedTokenStream(tokenArr), i);
        shingleFilter.setOutputUnigrams(z);
        shingleFilterTestCommon(shingleFilter, tokenArr2, iArr, strArr);
    }

    protected void shingleFilterTest(int i, int i2, Token[] tokenArr, Token[] tokenArr2, int[] iArr, String[] strArr, boolean z) throws IOException {
        ShingleFilter shingleFilter = new ShingleFilter(new CannedTokenStream(tokenArr), i, i2);
        shingleFilter.setOutputUnigrams(z);
        shingleFilterTestCommon(shingleFilter, tokenArr2, iArr, strArr);
    }

    protected void shingleFilterTest(int i, int i2, Token[] tokenArr, Token[] tokenArr2, int[] iArr, String[] strArr, boolean z, boolean z2) throws IOException {
        ShingleFilter shingleFilter = new ShingleFilter(new CannedTokenStream(tokenArr), i, i2);
        shingleFilter.setOutputUnigrams(z);
        shingleFilter.setOutputUnigramsIfNoShingles(z2);
        shingleFilterTestCommon(shingleFilter, tokenArr2, iArr, strArr);
    }

    protected void shingleFilterTest(String str, int i, int i2, Token[] tokenArr, Token[] tokenArr2, int[] iArr, String[] strArr, boolean z) throws IOException {
        ShingleFilter shingleFilter = new ShingleFilter(new CannedTokenStream(tokenArr), i, i2);
        shingleFilter.setTokenSeparator(str);
        shingleFilter.setOutputUnigrams(z);
        shingleFilterTestCommon(shingleFilter, tokenArr2, iArr, strArr);
    }

    protected void shingleFilterTestCommon(ShingleFilter shingleFilter, Token[] tokenArr, int[] iArr, String[] strArr) throws IOException {
        String[] strArr2 = new String[tokenArr.length];
        int[] iArr2 = new int[tokenArr.length];
        int[] iArr3 = new int[tokenArr.length];
        for (int i = 0; i < tokenArr.length; i++) {
            strArr2[i] = new String(tokenArr[i].buffer(), 0, tokenArr[i].length());
            iArr2[i] = tokenArr[i].startOffset();
            iArr3[i] = tokenArr[i].endOffset();
        }
        assertTokenStreamContents(shingleFilter, strArr2, iArr2, iArr3, strArr, iArr);
    }

    private static Token createToken(String str, int i, int i2) {
        return createToken(str, i, i2, 1);
    }

    private static Token createToken(String str, int i, int i2, int i3) {
        Token token = new Token();
        token.setOffset(i, i2);
        token.copyBuffer(str.toCharArray(), 0, str.length());
        token.setPositionIncrement(i3);
        return token;
    }

    public void testRandomStrings() throws Exception {
        Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.shingle.ShingleFilterTest.1
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new ShingleFilter(mockTokenizer));
            }
        };
        checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
        analyzer.close();
    }

    public void testRandomHugeStrings() throws Exception {
        Random random = random();
        Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.shingle.ShingleFilterTest.2
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, new ShingleFilter(mockTokenizer));
            }
        };
        checkRandomData(random, analyzer, 100 * RANDOM_MULTIPLIER, 8192);
        analyzer.close();
    }

    public void testEmptyTerm() throws IOException {
        Analyzer analyzer = new Analyzer() { // from class: org.apache.lucene.analysis.shingle.ShingleFilterTest.3
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                KeywordTokenizer keywordTokenizer = new KeywordTokenizer();
                return new Analyzer.TokenStreamComponents(keywordTokenizer, new ShingleFilter(keywordTokenizer));
            }
        };
        checkOneTerm(analyzer, "", "");
        analyzer.close();
    }

    public void testTrailingHole1() throws IOException {
        assertTokenStreamContents(new ShingleFilter(new CannedTokenStream(1, 9, new Token[]{createToken("wizard", 0, 6)}), 2, 2), new String[]{"wizard", "wizard _"}, new int[]{0, 0}, new int[]{6, 9}, new int[]{1, 0}, 9);
    }

    public void testTrailingHole2() throws IOException {
        assertTokenStreamContents(new ShingleFilter(new CannedTokenStream(1, 16, new Token[]{createToken("purple", 0, 6), createToken("wizard", 7, 13)}), 2, 2), new String[]{"purple", "purple wizard", "wizard", "wizard _"}, new int[]{0, 0, 7, 7}, new int[]{6, 13, 13, 16}, new int[]{1, 0, 1, 0}, 16);
    }

    public void testTwoTrailingHoles() throws IOException {
        assertTokenStreamContents(new ShingleFilter(new CannedTokenStream(2, 20, new Token[]{createToken("purple", 0, 6), createToken("wizard", 7, 13)}), 2, 2), new String[]{"purple", "purple wizard", "wizard", "wizard _"}, new int[]{0, 0, 7, 7}, new int[]{6, 13, 13, 20}, new int[]{1, 0, 1, 0}, 20);
    }

    public void testTwoTrailingHolesTriShingle() throws IOException {
        assertTokenStreamContents(new ShingleFilter(new CannedTokenStream(2, 20, new Token[]{createToken("purple", 0, 6), createToken("wizard", 7, 13)}), 2, 3), new String[]{"purple", "purple wizard", "purple wizard _", "wizard", "wizard _", "wizard _ _"}, new int[]{0, 0, 0, 7, 7, 7}, new int[]{6, 13, 20, 13, 20, 20}, new int[]{1, 0, 0, 1, 0, 0}, 20);
    }

    public void testTwoTrailingHolesTriShingleWithTokenFiller() throws IOException {
        Token[] tokenArr = {createToken("purple", 0, 6), createToken("wizard", 7, 13)};
        ShingleFilter shingleFilter = new ShingleFilter(new CannedTokenStream(2, 20, tokenArr), 2, 3);
        shingleFilter.setFillerToken("--");
        assertTokenStreamContents(shingleFilter, new String[]{"purple", "purple wizard", "purple wizard --", "wizard", "wizard --", "wizard -- --"}, new int[]{0, 0, 0, 7, 7, 7}, new int[]{6, 13, 20, 13, 20, 20}, new int[]{1, 0, 0, 1, 0, 0}, 20);
        ShingleFilter shingleFilter2 = new ShingleFilter(new CannedTokenStream(2, 20, tokenArr), 2, 3);
        shingleFilter2.setFillerToken("");
        assertTokenStreamContents(shingleFilter2, new String[]{"purple", "purple wizard", "purple wizard ", "wizard", "wizard ", "wizard  "}, new int[]{0, 0, 0, 7, 7, 7}, new int[]{6, 13, 20, 13, 20, 20}, new int[]{1, 0, 0, 1, 0, 0}, 20);
        ShingleFilter shingleFilter3 = new ShingleFilter(new CannedTokenStream(2, 20, tokenArr), 2, 3);
        shingleFilter3.setFillerToken((String) null);
        assertTokenStreamContents(shingleFilter3, new String[]{"purple", "purple wizard", "purple wizard ", "wizard", "wizard ", "wizard  "}, new int[]{0, 0, 0, 7, 7, 7}, new int[]{6, 13, 20, 13, 20, 20}, new int[]{1, 0, 0, 1, 0, 0}, 20);
        ShingleFilter shingleFilter4 = new ShingleFilter(new CannedTokenStream(2, 20, tokenArr), 2, 3);
        shingleFilter4.setFillerToken((String) null);
        shingleFilter4.setTokenSeparator((String) null);
        assertTokenStreamContents(shingleFilter4, new String[]{"purple", "purplewizard", "purplewizard", "wizard", "wizard", "wizard"}, new int[]{0, 0, 0, 7, 7, 7}, new int[]{6, 13, 20, 13, 20, 20}, new int[]{1, 0, 0, 1, 0, 0}, 20);
    }

    public void testPositionLength() throws Exception {
        assertTokenStreamContents(new Analyzer() { // from class: org.apache.lucene.analysis.shingle.ShingleFilterTest.4
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                ShingleFilter shingleFilter = new ShingleFilter(mockTokenizer, 4, 4);
                shingleFilter.setOutputUnigrams(false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, shingleFilter);
            }
        }.tokenStream("", "to be or not to be"), new String[]{"to be or not", "be or not to", "or not to be"}, new int[]{0, 3, 6}, new int[]{12, 15, 18}, null, new int[]{1, 1, 1}, new int[]{1, 1, 1}, 18, false);
        assertTokenStreamContents(new Analyzer() { // from class: org.apache.lucene.analysis.shingle.ShingleFilterTest.5
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                ShingleFilter shingleFilter = new ShingleFilter(mockTokenizer, 2, 4);
                shingleFilter.setOutputUnigrams(false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, shingleFilter);
            }
        }.tokenStream("", "to be or not to be"), new String[]{"to be", "to be or", "to be or not", "be or", "be or not", "be or not to", "or not", "or not to", "or not to be", "not to", "not to be", "to be"}, new int[]{0, 0, 0, 3, 3, 3, 6, 6, 6, 9, 9, 13}, new int[]{5, 8, 12, 8, 12, 15, 12, 15, 18, 15, 18, 18}, null, new int[]{1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1}, new int[]{1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 1}, 18, false);
        assertTokenStreamContents(new Analyzer() { // from class: org.apache.lucene.analysis.shingle.ShingleFilterTest.6
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                ShingleFilter shingleFilter = new ShingleFilter(mockTokenizer, 3, 4);
                shingleFilter.setOutputUnigrams(false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, shingleFilter);
            }
        }.tokenStream("", "to be or not to be"), new String[]{"to be or", "to be or not", "be or not", "be or not to", "or not to", "or not to be", "not to be"}, new int[]{0, 0, 3, 3, 6, 6, 9}, new int[]{8, 12, 12, 15, 15, 18, 18}, null, new int[]{1, 0, 1, 0, 1, 0, 1, 0}, new int[]{1, 2, 1, 2, 1, 2, 1, 2}, 18, false);
        assertTokenStreamContents(new Analyzer() { // from class: org.apache.lucene.analysis.shingle.ShingleFilterTest.7
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                ShingleFilter shingleFilter = new ShingleFilter(mockTokenizer, 3, 5);
                shingleFilter.setOutputUnigrams(false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, shingleFilter);
            }
        }.tokenStream("", "to be or not to be"), new String[]{"to be or", "to be or not", "to be or not to", "be or not", "be or not to", "be or not to be", "or not to", "or not to be", "not to be"}, new int[]{0, 0, 0, 3, 3, 3, 6, 6, 9, 9}, new int[]{8, 12, 15, 12, 15, 18, 15, 18, 18}, null, new int[]{1, 0, 0, 1, 0, 0, 1, 0, 1, 0}, new int[]{1, 2, 3, 1, 2, 3, 1, 2, 1}, 18, false);
    }
}
