package org.apache.lucene.analysis.charfilter;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.util.TestUtil;

/* loaded from: input_file:org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.class */
public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
    private static Analyzer newTestAnalyzer() {
        return new Analyzer() { // from class: org.apache.lucene.analysis.charfilter.HTMLStripCharFilterTest.1
            protected Analyzer.TokenStreamComponents createComponents(String str) {
                MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents(mockTokenizer, mockTokenizer);
            }

            protected Reader initReader(String str, Reader reader) {
                return new HTMLStripCharFilter(reader);
            }
        };
    }

    public void test() throws Exception {
        assertHTMLStripsTo("<div class=\"foo\">this is some text</div> here is a <a href=\"#bar\">link</a> and another <a href=\"http://lucene.apache.org/\">link</a>. This is an entity: &amp; plus a &lt;.  Here is an &. <!-- is a comment -->", "\nthis is some text\n here is a link and another link. This is an entity: & plus a <.  Here is an &. ", (Set<String>) null);
    }

    public void testHTML() throws Exception {
        HTMLStripCharFilter hTMLStripCharFilter = new HTMLStripCharFilter(new InputStreamReader(getClass().getResourceAsStream("htmlStripReaderTest.html"), StandardCharsets.UTF_8));
        StringBuilder sb = new StringBuilder();
        while (true) {
            int read = hTMLStripCharFilter.read();
            if (read == -1) {
                break;
            } else {
                sb.append((char) read);
            }
        }
        String sb2 = sb.toString();
        assertTrue("Entity not properly escaped", sb2.indexOf("&lt;") == -1);
        assertTrue("Forrest should have been stripped out", sb2.indexOf("forrest") == -1 && sb2.indexOf("Forrest") == -1);
        assertTrue("File should start with 'Welcome to Solr' after trimming", sb2.trim().startsWith("Welcome to Solr"));
        assertTrue("File should start with 'Foundation.' after trimming", sb2.trim().endsWith("Foundation."));
    }

    public void testMSWord14GeneratedHTML() throws Exception {
        HTMLStripCharFilter hTMLStripCharFilter = new HTMLStripCharFilter(new InputStreamReader(getClass().getResourceAsStream("MS-Word 14 generated.htm"), StandardCharsets.UTF_8));
        StringBuilder sb = new StringBuilder();
        while (true) {
            int read = hTMLStripCharFilter.read();
            if (read == -1) {
                assertEquals("'" + sb.toString().trim() + "' is not equal to 'This is a test'", "This is a test", sb.toString().trim());
                return;
            }
            sb.append((char) read);
        }
    }

    public void testGamma() throws Exception {
        assertHTMLStripsTo("&Gamma;", "Γ", new HashSet(Arrays.asList("reserved")));
    }

    public void testEntities() throws Exception {
        assertHTMLStripsTo("&nbsp; &lt;foo&gt; &Uuml;bermensch &#61; &Gamma; bar &#x393;", "  <foo> Übermensch = Γ bar Γ", new HashSet(Arrays.asList("reserved")));
    }

    public void testMoreEntities() throws Exception {
        assertHTMLStripsTo("&nbsp; &lt;junk/&gt; &nbsp; &#33; &#64; and &#8217;", "  <junk/>   ! @ and ’", new HashSet(Arrays.asList("reserved")));
    }

    public void testReserved() throws Exception {
        HashSet hashSet = new HashSet();
        hashSet.add("reserved");
        HTMLStripCharFilter hTMLStripCharFilter = new HTMLStripCharFilter(new StringReader("aaa bbb <reserved ccc=\"ddddd\"> eeee </reserved> ffff <reserved ggg=\"hhhh\"/> <other/>"), hashSet);
        StringBuilder sb = new StringBuilder();
        while (true) {
            int read = hTMLStripCharFilter.read();
            if (read == -1) {
                break;
            } else {
                sb.append((char) read);
            }
        }
        String sb2 = sb.toString();
        assertTrue("Escaped tag not preserved: " + sb2.indexOf("reserved"), sb2.indexOf("reserved") == 9);
        assertTrue("Escaped tag not preserved: " + sb2.indexOf("reserved", 15), sb2.indexOf("reserved", 15) == 38);
        assertTrue("Escaped tag not preserved: " + sb2.indexOf("reserved", 41), sb2.indexOf("reserved", 41) == 54);
        assertTrue("Other tag should be removed", sb2.indexOf("other") == -1);
    }

    public void testMalformedHTML() throws Exception {
        String[] strArr = {"a <a hr<ef=aa<a>> </close</a>", "a <a hr<ef=aa> </close", "<a href=http://dmoz.org/cgi-bin/add.cgi?where=/arts/\" class=lu style=\"font-size: 9px\" target=dmoz>Submit a Site</a>", "Submit a Site", "<a href=javascript:ioSwitch('p8','http://www.csmonitor.com/') title=expand id=e8 class=expanded rel=http://www.csmonitor.com/>Christian Science", "Christian Science", "<link rel=\"alternate\" type=\"application/rss+xml\" title=\"San Francisco \" 2008 RSS Feed\" href=\"http://2008.sf.wordcamp.org/feed/\" />", "\n", "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine", "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine", "<a href=\"http://ucblibraries.colorado.edu/how/index.htm\"class=\"pageNavAreaText\">", "", "<link title=\"^\\\" 21Sta's Blog\" rel=\"search\"  type=\"application/opensearchdescription+xml\"  href=\"http://21sta.com/blog/inc/opensearch.php\" />", "\n", "<a href=\"#postcomment\" title=\"\"Leave a comment\";\">?", "?", "<a href='/modern-furniture'   ' id='21txt' class='offtab'   onMouseout=\"this.className='offtab';  return true;\" onMouseover=\"this.className='ontab';  return true;\">", "", "<a href='http://alievi.wordpress.com/category/01-todos-posts/' style='font-size: 275%; padding: 1px; margin: 1px;' title='01 - Todos Post's (83)'>", "", "The <a href=<a href=\"http://www.advancedmd.com>medical\">http://www.advancedmd.com>medical</a> practice software</a>", "The <a href=medical\">http://www.advancedmd.com>medical practice software", "<a href=\"node/21426\" class=\"clipTitle2\" title=\"Levi.com/BMX 2008 Clip of the Week 29 \"Morgan Wade Leftover Clips\"\">Levi.com/BMX 2008 Clip of the Week 29...", "Levi.com/BMX 2008 Clip of the Week 29...", "<a href=\"printer_friendly.php?branch=&year=&submit=go&screen=\";\">Printer Friendly", "Printer Friendly", "<a href=#\" ondragstart=\"return false\" onclick=\"window.external.AddFavorite('http://www.amazingtextures.com', 'Amazing Textures');return false\" onmouseover=\"window.status='Add to Favorites';return true\">Add to Favorites", "Add to Favorites", "<a href=\"../at_home/at_home_search.html\"../_home/at_home_search.html\">At", "At", "E-mail: <a href=\"\"mailto:XXXXXX@example.com\" \">XXXXXX@example.com </a>", "E-mail: XXXXXX@example.com ", "<li class=\"farsi\"><a title=\"A'13?\" alt=\"A'13?\" href=\"http://www.america.gov/persian\" alt=\"\" name=\"A'13?\"A'13? title=\"A'13?\">A'13?</a></li>", "\nA'13?\n", "<li><a href=\"#28\" title=\"Hubert \"Geese\" Ausby\">Hubert \"Geese\" Ausby</a></li>", "\nHubert \"Geese\" Ausby\n", "<href=\"http://anbportal.com/mms/login.asp\">", "\n", "<a href=\"", "<a href=\"", "<a href=\">", "", "<a rel=\"nofollow\" href=\"http://anissanina31.skyrock.com/1895039493-Hi-tout-le-monde.html\" title=\" Hi, tout le monde !>#</a>", "#", "<a href=\"http://annunciharleydavidsonusate.myblog.it/\" title=\"Annunci Moto e Accessori Harley Davidson\" target=\"_blank\"><img src=\"http://annunciharleydavidsonusate.myblog.it/images/Antipixel.gif\" /></a>", "", "<a href=\"video/addvideo&v=120838887181\" onClick=\"return confirm('Are you sure you want  add this video to your profile? If it exists some video in your profile will be overlapped by this video!!')\" \" onmouseover=\"this.className='border2'\" onmouseout=\"this.className=''\">", "", "<a href=#Services & Support>", "", "<input type=\"image\" src=\"http://apologyindex.com/ThemeFiles/83401-72905/images/btn_search.gif\"value=\"Search\" name=\"Search\" alt=\"Search\" class=\"searchimage\" onclick=\"incom ='&sc=' + document.getElementById('sel').value ; var dt ='&dt=' + document.getElementById('dt').value; var searchKeyword = document.getElementById('q').value ; searchKeyword = searchKeyword.replace(/\\s/g,''); if (searchKeyword.length < 3){alert('Nothing to search. Search keyword should contain atleast 3 chars.'); return false; } var al='&al=' +  document.getElementById('advancedlink').style.display ;  document.location.href='http://apologyindex.com/search.aspx?q=' + document.getElementById('q').value + incom + dt + al;\" />", "", "<input type=\"image\" src=\"images/afbe.gif\" width=\"22\" height=\"22\"  hspace=\"4\" title=\"Add to Favorite\" alt=\"Add to Favorite\"onClick=\" if(window.sidebar){ window.sidebar.addPanel(document.title,location.href,''); }else if(window.external){ window.external.AddFavorite(location.href,document.title); }else if(window.opera&&window.print) { return true; }\">", "", "<area shape=\"rect\" coords=\"12,153,115,305\" href=\"http://statenislandtalk.com/v-web/gallery/Osmundsen-family\"Art's Norwegian Roots in Rogaland\">", "\n", "<a rel=\"nofollow\" href=\"http://arth26.skyrock.com/660188240-bonzai.html\" title=\"bonza>#", "#", "<a href=  >", "", "<ahref=http:..", "<ahref=http:..", "<ahref=http:..>", "\n", "<ahref=\"http://aseigo.bddf.ca/cms/1025\">A", "\nA", "<a href=\"javascript:calendar_window=window.open('/calendar.aspx?formname=frmCalendar.txtDate','calendar_window','width=154,height=188');calendar_window.focus()\">", "", "<a href=\"/applications/defenseaerospace/19+rackmounts\" title=\"19\" Rackmounts\">", "", "<a href=http://www.azimprimerie.fr/flash/backup/lewes-zip-code/savage-model-110-manual.html title=savage model 110 manual rel=dofollow>", "", "<a class=\"at\" name=\"Lamborghini  href=\"http://lamborghini.coolbegin.com\">Lamborghini /a>", "Lamborghini /a>", "<A href='newslink.php?news_link=http%3A%2F%2Fwww.worldnetdaily.com%2Findex.php%3Ffa%3DPAGE.view%26pageId%3D85729&news_title=Florida QB makes 'John 3:16' hottest Google search Tebow inscribed Bible reference on eye black for championship game' TARGET=_blank>", "", "<a href=/myspace !style='color:#993333'>", "", "<meta name=3DProgId content=3DExcel.Sheet>", "\n", "<link id=3D\"shLink\" href=3D\"PSABrKelly-BADMINTONCupResults08FINAL2008_09_19=_files/sheet004.htm\">", "\n", "<td bgcolor=3D\"#FFFFFF\" nowrap>", "\n", "<a href=\"http://basnect.info/usersearch/\"predicciones-mundiales-2009\".html\">\"predicciones mundiales 2009\"</a>", "\"predicciones mundiales 2009\"", "<a class=\"comment-link\" href=\"https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588\"location.href=https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588;>", "", "<a href = \"/videos/Bishop\"/\" title = \"click to see more Bishop\" videos\">Bishop\"</a>", "Bishop\"", "<a href=\"http://bhaa.ie/calendar/event.php?eid=20081203150127531\"\">BHAA Eircom 2 &amp; 5 miles CC combined start</a>", "BHAA Eircom 2 & 5 miles CC combined start", "<a href=\"http://people.tribe.net/wolfmana\" onClick='setClick(\"Application[tribe].Person[bb7df210-9dc0-478c-917f-436b896bcb79]\")'\" title=\"Mana\">", "", "<a  href=\"http://blog.edu-cyberpg.com/ct.ashx?id=6143c528-080c-4bb2-b765-5ec56c8256d3&url=http%3a%2f%2fwww.gsa.ac.uk%2fmackintoshsketchbook%2f\"\" eudora=\"autourl\">", "", "<input type=\"text\" value=\"<search here>\">", "<input type=\"text\" value=\"\n\">", "<input type=\"text\" value=\"<search here\">", "<input type=\"text\" value=\"\n", "<input type=\"text\" value=\"search here>\">", "\">", "<input type=\"text\" value=\"&lt;search here&gt;\" onFocus=\"this.value='<search here>'\">", "", "<![if ! IE]>\n<link href=\"http://i.deviantart.com/icons/favicon.png\" rel=\"shortcut icon\"/>\n<![endif]>", "\n\n\n", "<![if supportMisalignedColumns]>\n<tr height=0 style='display:none'>\n<td width=64 style='width:48pt'></td>\n</tr>\n<![endif]>", "\n\n\n\n\n\n\n\n"};
        for (int i = 0; i < strArr.length; i += 2) {
            assertHTMLStripsTo(strArr[i], strArr[i + 1], (Set<String>) null);
        }
    }

    public void testBufferOverflow() throws Exception {
        StringBuilder sb = new StringBuilder(HTMLStripCharFilter.getInitialBufferSize() + 50);
        sb.append("ah<?> ??????");
        appendChars(sb, HTMLStripCharFilter.getInitialBufferSize() + 500);
        assertHTMLStripsTo((Reader) new HTMLStripCharFilter(new BufferedReader(new StringReader(sb.toString()))), sb.toString(), (Set<String>) null);
        sb.setLength(0);
        sb.append("<!--");
        appendChars(sb, (3 * HTMLStripCharFilter.getInitialBufferSize()) + 500);
        sb.append("-->foo");
        assertHTMLStripsTo(sb.toString(), "foo", (Set<String>) null);
        sb.setLength(0);
        sb.append("<?");
        appendChars(sb, HTMLStripCharFilter.getInitialBufferSize() + 500);
        sb.append("?>");
        assertHTMLStripsTo(sb.toString(), "", (Set<String>) null);
        sb.setLength(0);
        sb.append("<b ");
        appendChars(sb, HTMLStripCharFilter.getInitialBufferSize() + 500);
        sb.append("/>");
        assertHTMLStripsTo(sb.toString(), "", (Set<String>) null);
    }

    private void appendChars(StringBuilder sb, int i) {
        int i2 = i / 2;
        for (int i3 = 0; i3 < i2; i3++) {
            sb.append('a').append(' ');
        }
    }

    public void testComment() throws Exception {
        assertHTMLStripsTo("<!--- three dashes, still a valid comment ---> ", " ", (Set<String>) null);
        assertHTMLStripsTo("<! -- blah > ", " ", (Set<String>) null);
        StringBuilder sb = new StringBuilder("<!--");
        appendChars(sb, TestUtil.nextInt(random(), 0, 1000));
        assertHTMLStripsTo(sb.toString(), "", (Set<String>) null);
    }

    public void doTestOffsets(String str) throws Exception {
        HTMLStripCharFilter hTMLStripCharFilter = new HTMLStripCharFilter(new BufferedReader(new StringReader(str)));
        int i = 0;
        int i2 = -1;
        while (true) {
            int read = hTMLStripCharFilter.read();
            if (read == -1) {
                return;
            }
            int correctOffset = hTMLStripCharFilter.correctOffset(i);
            if (read == 88) {
                i2 = str.indexOf(88, i2 + 1);
                assertEquals(i2, correctOffset);
            }
            i++;
        }
    }

    public void testOffsets() throws Exception {
        doTestOffsets("hello <p> X<p> how <p>X are you");
        doTestOffsets("X &amp; X &#40; X &lt; &gt; X");
        doTestOffsets("X < &zz >X &# < X > < &l > &g < X");
    }

    static void assertLegalOffsets(String str) throws Exception {
        int length = str.length();
        HTMLStripCharFilter hTMLStripCharFilter = new HTMLStripCharFilter(new BufferedReader(new StringReader(str)));
        int i = 0;
        while (hTMLStripCharFilter.read() != -1) {
            int correctOffset = hTMLStripCharFilter.correctOffset(i);
            assertTrue("invalid offset correction: " + i + "->" + correctOffset + " for doc of length: " + length, correctOffset <= length);
            i++;
        }
    }

    public void testLegalOffsets() throws Exception {
        assertLegalOffsets("hello world");
        assertLegalOffsets("hello &#x world");
    }

    public void testRandom() throws Exception {
        int i = RANDOM_MULTIPLIER * 1000;
        Analyzer newTestAnalyzer = newTestAnalyzer();
        checkRandomData(random(), newTestAnalyzer, i);
        newTestAnalyzer.close();
    }

    public void testRandomHugeStrings() throws Exception {
        int i = RANDOM_MULTIPLIER * 100;
        Analyzer newTestAnalyzer = newTestAnalyzer();
        checkRandomData(random(), newTestAnalyzer, i, 8192);
        newTestAnalyzer.close();
    }

    public void testCloseBR() throws Exception {
        Analyzer newTestAnalyzer = newTestAnalyzer();
        checkAnalysisConsistency(random(), newTestAnalyzer, random().nextBoolean(), " Secretary)</br> [[M");
        newTestAnalyzer.close();
    }

    public void testServerSideIncludes() throws Exception {
        assertHTMLStripsTo("one<img src=\"image.png\"\n alt =  \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}'  -->\"\n\n title=\"Title: <!--#echo var=\"IMAGE_CAPTION\"-->\">two", "onetwo", (Set<String>) null);
        assertHTMLStripsTo("one<script><!-- <!--#config comment=\"<!-- \\\"comment\\\"-->\"--> --></script>two", "one\ntwo", (Set<String>) null);
    }

    public void testScriptQuotes() throws Exception {
        assertHTMLStripsTo("one<script attr= bare><!-- action('<!-- comment -->', \"\\\"-->\\\"\"); --></script>two", "one\ntwo", (Set<String>) null);
        assertHTMLStripsTo("hello<script><!-- f('<!--internal--></script>'); --></script>", "hello\n", (Set<String>) null);
    }

    public void testEscapeScript() throws Exception {
        assertHTMLStripsTo("one<script no-value-attr>callSomeMethod();</script>two", "one<script no-value-attr></script>two", new HashSet(Arrays.asList("SCRIPT")));
    }

    public void testStyle() throws Exception {
        assertHTMLStripsTo("one<style type=\"text/css\">\n<!--\n@import url('http://www.lasletrasdecanciones.com/css.css');\n-->\n</style>two", "one\ntwo", (Set<String>) null);
    }

    public void testEscapeStyle() throws Exception {
        assertHTMLStripsTo("one<style type=\"text/css\"> body,font,a { font-family:arial; } </style>two", "one<style type=\"text/css\"></style>two", new HashSet(Arrays.asList("STYLE")));
    }

    public void testBR() throws Exception {
        String[] strArr = {"one<BR />two<br>three", "one\ntwo\nthree", "one<BR some stuff here too>two</BR>", "one\ntwo\n"};
        for (int i = 0; i < strArr.length; i += 2) {
            assertHTMLStripsTo(strArr[i], strArr[i + 1], (Set<String>) null);
        }
    }

    public void testEscapeBR() throws Exception {
        assertHTMLStripsTo("one<BR class='whatever'>two</\nBR\n>", "one<BR class='whatever'>two</\nBR\n>", new HashSet(Arrays.asList("BR")));
    }

    public void testInlineTagsNoSpace() throws Exception {
        assertHTMLStripsTo("one<sPAn class=\"invisible\">two<sup>2<sup>e</sup></sup>.</SpaN>three", "onetwo2e.three", (Set<String>) null);
    }

    public void testCDATA() throws Exception {
        String replaceFirst = TestUtil.randomHtmlishString(random(), 100).replaceAll(">", " ").replaceFirst("^--", "__");
        String str = "<!" + replaceFirst + "-[CDATA[&]]>";
        TestUtil.randomHtmlishString(random(), 100).replaceAll(">", " ").replaceFirst("^--", "__");
        String str2 = "<!" + replaceFirst + "-[CDATA[";
        String[] strArr = {"one<![CDATA[<one><two>three<four></four></two></one>]]>two", "one<one><two>three<four></four></two></one>two", "one<![CDATA[two<![CDATA[three]]]]><![CDATA[>four]]>five", "onetwo<![CDATA[three]]>fourfive", "<! [CDATA[&]]>", "", "<! [CDATA[&] ] >", "", "<! [CDATA[&]]", "<! [CDATA[&]]", "<!\u2009[CDATA[&]]>", "", "<!\u2009[CDATA[&]\u2009]\u2009>", "", "<!\u2009[CDATA[&]\u2009]\u2009", "<!\u2009[CDATA[&]\u2009]\u2009", str, "", "<![CDATA[", "", "<![CDATA[<br>", "<br>", "<![CDATA[<br>]]", "<br>]]", "<![CDATA[<br>]]>", "<br>", "<![CDATA[<br>] ] >", "<br>] ] >", "<![CDATA[<br>]\u2009]\u2009>", "<br>]\u2009]\u2009>", "<!\u2009[CDATA[", "<!\u2009[CDATA[", str2, str2};
        for (int i = 0; i < strArr.length; i += 2) {
            assertHTMLStripsTo(strArr[i], strArr[i + 1], (Set<String>) null);
        }
    }

    public void testUnclosedAngleBang() throws Exception {
        assertHTMLStripsTo("<![endif]", "<![endif]", (Set<String>) null);
    }

    public void testUppercaseCharacterEntityVariants() throws Exception {
        assertHTMLStripsTo(" &QUOT;-&COPY;&GT;>&LT;<&REG;&AMP;", " \"-©>><<®&", (Set<String>) null);
    }

    public void testMSWordMalformedProcessingInstruction() throws Exception {
        assertHTMLStripsTo("one<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />two", "onetwo", (Set<String>) null);
    }

    public void testSupplementaryCharsInTags() throws Exception {
        assertHTMLStripsTo("one<��艱鍟䇹愯瀛>two<瀛愯��>three 瀛愯��</瀛愯��>four</��艱鍟䇹愯瀛>five<����>six<����/>seven", "one\ntwo\nthree 瀛愯��\nfour\nfive\nsix\nseven", (Set<String>) null);
    }

    public void testRandomBrokenHTML() throws Exception {
        String randomHtmlishString = TestUtil.randomHtmlishString(random(), 10000);
        Analyzer newTestAnalyzer = newTestAnalyzer();
        checkAnalysisConsistency(random(), newTestAnalyzer, random().nextBoolean(), randomHtmlishString);
        newTestAnalyzer.close();
    }

    public void testRandomText() throws Exception {
        StringBuilder sb = new StringBuilder();
        int nextInt = TestUtil.nextInt(random(), 10, 10000);
        switch (TestUtil.nextInt(random(), 0, 4)) {
            case 0:
                for (int i = 0; i < nextInt; i++) {
                    sb.append(TestUtil.randomUnicodeString(random(), 20));
                    sb.append(' ');
                }
                break;
            case 1:
                for (int i2 = 0; i2 < nextInt; i2++) {
                    sb.append(TestUtil.randomRealisticUnicodeString(random(), 3, 20));
                    sb.append(' ');
                }
                break;
            default:
                for (int i3 = 0; i3 < nextInt; i3++) {
                    sb.append(TestUtil.randomSimpleString(random()));
                    sb.append(' ');
                }
                break;
        }
        do {
        } while (new HTMLStripCharFilter(new StringReader(sb.toString())).read() != -1);
    }

    public void testUTF16Surrogates() throws Exception {
        Analyzer newTestAnalyzer = newTestAnalyzer();
        assertAnalyzesTo(newTestAnalyzer, " one two &#xD86C;&#XdC01;three", new String[]{"one", "two", "��three"});
        assertAnalyzesTo(newTestAnalyzer, " &#55404;&#XdC01;", new String[]{"��"});
        assertAnalyzesTo(newTestAnalyzer, " &#xD86C;&#56321;", new String[]{"��"});
        assertAnalyzesTo(newTestAnalyzer, " &#55404;&#56321;", new String[]{"��"});
        assertAnalyzesTo(newTestAnalyzer, " &#55404;&#57999;", new String[]{"�\ue28f"});
        assertAnalyzesTo(newTestAnalyzer, " &#xD86C;&#57999;", new String[]{"�\ue28f"});
        assertAnalyzesTo(newTestAnalyzer, " &#55002;&#XdC01;", new String[]{"훚�"});
        assertAnalyzesTo(newTestAnalyzer, " &#55002;&#56321;", new String[]{"훚�"});
        assertAnalyzesTo(newTestAnalyzer, " &#Xd921;", new String[]{"�"});
        assertAnalyzesTo(newTestAnalyzer, " &#Xd921", new String[]{"�"});
        assertAnalyzesTo(newTestAnalyzer, " &#Xd921<br>", new String[]{"&#Xd921"});
        assertAnalyzesTo(newTestAnalyzer, " &#55528;", new String[]{"�"});
        assertAnalyzesTo(newTestAnalyzer, " &#55528", new String[]{"�"});
        assertAnalyzesTo(newTestAnalyzer, " &#55528<br>", new String[]{"&#55528"});
        assertAnalyzesTo(newTestAnalyzer, " &#xdfdb;", new String[]{"�"});
        assertAnalyzesTo(newTestAnalyzer, " &#xdfdb", new String[]{"�"});
        assertAnalyzesTo(newTestAnalyzer, " &#xdfdb<br>", new String[]{"&#xdfdb"});
        assertAnalyzesTo(newTestAnalyzer, " &#57209;", new String[]{"�"});
        assertAnalyzesTo(newTestAnalyzer, " &#57209", new String[]{"�"});
        assertAnalyzesTo(newTestAnalyzer, " &#57209<br>", new String[]{"&#57209"});
        newTestAnalyzer.close();
    }

    public static void assertHTMLStripsTo(String str, String str2, Set<String> set) throws Exception {
        assertHTMLStripsTo(new StringReader(str), str2, set);
    }

    public static void assertHTMLStripsTo(Reader reader, String str, Set<String> set) throws Exception {
        HTMLStripCharFilter hTMLStripCharFilter = null == set ? new HTMLStripCharFilter(reader) : new HTMLStripCharFilter(reader, set);
        StringBuilder sb = new StringBuilder();
        while (true) {
            try {
                int read = hTMLStripCharFilter.read();
                if (read == -1) {
                    assertEquals("'" + sb.toString() + "' is not equal to '" + str + "'", str, sb.toString());
                    return;
                }
                sb.append((char) read);
            } catch (Exception e) {
                if (!str.equals(sb.toString())) {
                    throw new Exception("('" + sb.toString() + "' is not equal to '" + str + "').  " + e.getMessage(), e);
                }
                throw e;
            }
        }
    }
}
