
    ,g                         d dl mZ d dlmZ d dlmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZ ddlmZ  e ee            Zd Zd Z G d	 d
      Zy)    )bisect_left)deque)html5   )	EOFTokenascii_lettersascii_upper_to_lowerdigits	hexdigitsreplacement_charactersspace_characterstag_token_types)HTMLInputStreamc                     | t         v ryt        t        |       x}t        t               k(  ryt        |   j	                  |       S )NTF)entitiesr   entity_keyslen
startswithprefixis     k/home/viktor/gitlab-persoonlijk/factuur-applicatie/venv/lib/python3.12/site-packages/tinyhtml5/tokenizer.pyhas_keys_with_prefixr      s=    f--#h-?q>$$V,,    c                     | t         v r| S t        dt        |       dz         D ]  }| d |  t         v s| d |  c S  t        |       )Nr   )r   ranger   KeyErrorr   s     r   longest_prefixr      sX    1c&kAo& #A2;("#A2; 6
r   c                      e Zd ZdZdNdZd Zd Zd Zd ZdOdZ	d	 Z
d
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d  Z!d! Z"d" Z#d# Z$d$ Z%d% Z&d& Z'd' Z(d( Z)d) Z*d* Z+d+ Z,d, Z-d- Z.d. Z/d/ Z0d0 Z1d1 Z2d2 Z3d3 Z4d4 Z5d5 Z6d6 Z7d7 Z8d8 Z9d9 Z:d: Z;d; Z<d< Z=d= Z>d> Z?d? Z@d@ ZAdA ZBdB ZCdC ZDdD ZEdE ZFdF ZGdG ZHdH ZIdI ZJdJ ZKdK ZLdL ZMdM ZNy)PHTMLTokenizerzHTML tokenizer.Nc                 d    t        |fi || _        || _        | j                  | _        d | _        y N)r   streamparser
data_statestatecurrent_token)selfr$   r%   kwargss       r   __init__zHTMLTokenizer.__init__)   s/    %f77 __
!r   c              #     K   t        g       | _        | j                         r| j                  j                  rOt
        j                  | j                  j                  j                  d      d | j                  j                  rO| j                  r)| j                  j                          | j                  r)| j                         ryyw)zThis is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.

        r   typedataN)	r   token_queuer'   r$   errorsr   PARSE_ERRORpoppopleftr)   s    r   __iter__zHTMLTokenizer.__iter__1   s      !9 jjl++$$!-- KK..2215  ++$$
 ""&&..00 "" jjls   BC4C=CCc                 n    t         j                  |d}|r||d<   | j                  j                  |       y)z%Add a parse error to the token queue.r-   datavarsN)r   r2   r0   append)r)   _datar8   tokens       r   parse_errorzHTMLTokenizer.parse_errorE   s3    **E: (E*&r   c                 \    | j                   j                  t        j                  |d       y)z+Add a characters string to the token queue.r-   N)r0   r9   r   
CHARACTERS)r)   r:   s     r   
characterszHTMLTokenizer.charactersL   s!    )9)95 IJr   c                    |rt         nt        }|rdnd}g }| j                  j                         }||v r0|j	                  |       | j                  j                         }||v r0t        dj                  |      |      }|t        v rt        |   }| j                  d|       nd|cxk  rdk  sn |dkD  rd	}| j                  d|       nad
|cxk  rdk  s8n d|cxk  rdk  s+n d|cxk  rdk  sn d|cxk  rdk  sn |t        g d      v r| j                  d|       t        |      }|dk7  r,| j                  d       | j                  j                  |       |S )zReturn either U+FFFD or the character based on the representation.

        It also discards ";" if present. If not present self.parse_error is
        invoked.

           
    z$illegal-codepoint-for-numeric-entity)integeri   i      �r                  i  i  )#   i  i  i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i rE   ;z numeric-entity-without-semicolon)r   r   r$   	characterr9   intjoinr   r<   	frozensetchrunget)r)   is_hexallowedradixstackrN   rD   replacements           r   consume_number_entityz#HTMLTokenizer.consume_number_entityP   sj     &)6" KK))+	7"LL#--/I 7"
 bggene, ,,09KCWU)6)w/A"KCWU 7,f,7,f,7,f,7,f,9 &: ; ;   :G ! Mg,K ?@KKi(r   c                    d}| j                   j                         g}|d   t        ddgt        v xs |d uxr ||d   k(  }|r | j                   j	                  |d          nK|d   dk(  rd}|j                  | j                   j                                |d   dv r+d}|j                  | j                   j                                |d   |rt        nt        v r1| j                   j	                  |d          | j                  |      }n| j                  d	       | j                   j	                  |j                                dd
j                  |       }nR|d   t        urPt        d
j                  |            sn5|j                  | j                   j                                |d   t        urP	 t        d
j                  |d d             }|d   dk7  r| j                  d       t        |      }||   t        v xs ||   t        v xs ||   dk(  }	|d   dk7  rB|r@|	r>| j                   j	                  |j                                dd
j                  |       }nI| j                   j	                  |j                                t         |    d
j                  ||d         }	 |r| j$                  d   d   dxx   |z  cc<   y |t        v rdnd}
| j&                  j                  t(        |
   |d       y # t"        $ rQ | j                  d       | j                   j	                  |j                                dd
j                  |       }Y w xY w)N&r   <#F)xXTzexpected-numeric-entityrC   rM   znamed-entity-without-semicolon=zexpected-named-entityr/   r   SPACE_CHARACTERSr>   r-   )r$   rN   r   r   rS   r9   r   r   rY   r<   r3   rP   r   r   r   r	   r   r   r(   r0   r   )r)   rU   from_attributeoutputrW   rS   hexentity_nameentity_lengthallowed_characterr.   s              r   consume_entityzHTMLTokenizer.consume_entity   s   &&()!Hc3:)9:: :D 8Wa%8 	 KKeAh'1X_CLL..01RyJ&T[[2245 Ry#Y6:!!%),33C8   !:;!!%))+.RWWU^,- )3&+BGGEN;T[[2245 )3&X,RWWU3BZ-@A r?c)$$%EF #K 0-(M9 0-(F20-(C/ " r?c)nARKK%%eiik2  01FKK%%eiik2 ( 56rwwu]^?T7U6VWFv&r*1-7-)/3C)C%D##U4[&$IJ/  .  !89!!%))+.RWWU^,-.s   :L AMMc                 *    | j                  |d       y)z5Replace the need for entity_in_attribute_value_state.T)rU   rc   N)ri   )r)   rU   s     r   process_entity_in_attributez)HTMLTokenizer.process_entity_in_attribute   s    GDAr   c                    | j                   }|d   t        v r|d   j                  t              |d<   |d   t        j
                  k(  rC|d   }t        |      }t        |      t        |      kD  r|j                  |ddd          ||d<   |d   t        j                  k(  r,|d   r| j                  d       |d   r| j                  d       | j                  j                  |       | j                  | _        y)	zThis method is a generic handler for emitting the tags.

        It also sets the state to "data" because that's what's needed after a
        token has been emitted.

        r.   namer/   Nr^   zattributes-in-end-tagselfClosingzself-closing-flag-on-end-tag)r(   r   	translater
   r   	START_TAGdictr   updateEND_TAGr<   r0   r9   r&   r'   )r)   r;   rawr/   s       r   emit_current_tokenz HTMLTokenizer.emit_current_token   s     ""=O+!&M334HIE&MV}/FmCys8c$i'KKDbD	* $fV}-=$$%<='$$%CD&__
r   c                     | j                   j                         }|dk(  r| j                  | _        y|dk(  r| j                  | _        y|dk(  r#| j                  d       | j                  d       y|t        u ry|t        v rN| j                  j                  t        j                  || j                   j                  t        d      z   d       y| j                   j                  d      }| j                  ||z          y)	Nr[   r\    invalid-codepointFTr-   r[   r\   rw   )r$   rN   entity_data_stater'   tag_open_stater<   r?   r   r   r0   r9   r   rb   chars_untilr)   r/   r?   s      r   r&   zHTMLTokenizer.data_state   s    {{$$&3;//DJ, + S[,,DJ( ' X01OOH%" ! S[%% ##..t{{667GNN%   001EFJOOD:-.r   c                 F    | j                          | j                  | _        yNT)ri   r&   r'   r5   s    r   rz   zHTMLTokenizer.entity_data_state  s    __
r   c                     | j                   j                         }|dk(  r| j                  | _        y|dk(  r| j                  | _        y|t
        u ry|dk(  r#| j                  d       | j                  d       y|t        v rN| j                  j                  t        j                  || j                   j                  t        d      z   d       y| j                   j                  d	      }| j                  ||z          y)
Nr[   r\   Frw   rx   rF   Tr-   ry   )r$   rN   $character_reference_in_rc_data_stater'   rcdata_less_than_sign_stater   r<   r?   r   r0   r9   r   rb   r|   )r)   r/   charss      r   rcdata_statezHTMLTokenizer.rcdata_state  s    {{$$&3;BBDJ. - S[99DJ* ) S[X01OOH%  %% ##..t{{667GNN%   KK++,@AEOOD5L)r   c                 F    | j                          | j                  | _        yr   )ri   r   r'   r5   s    r   r   z2HTMLTokenizer.character_reference_in_rc_data_state(  s    &&
r   c                 &   | j                   j                         }|dk(  r| j                  | _        y|dk(  r#| j	                  d       | j                  d       y|t        u ry| j                   j                  d      }| j                  ||z          yNr\   rw   rx   rF   F)r\   rw   T)r$   rN   rawtext_less_than_sign_stater'   r<   r?   r   r|   r}   s      r   rawtext_statezHTMLTokenizer.rawtext_state-  s    {{$$&3;::DJ  X01OOH%  S[00AJOOD:-.r   c                 &   | j                   j                         }|dk(  r| j                  | _        y|dk(  r#| j	                  d       | j                  d       y|t        u ry| j                   j                  d      }| j                  ||z          yr   )r$   rN    script_data_less_than_sign_stater'   r<   r?   r   r|   r}   s      r   script_data_statezHTMLTokenizer.script_data_state;  s    {{$$&3;>>DJ  X01OOH%  S[00AJOOD:-.r   c                     | j                   j                         }|t        u ry|dk(  r#| j                  d       | j	                  d       y| j	                  || j                   j                  d      z          y)NFrw   rx   rF   T)r$   rN   r   r<   r?   r|   r)   r/   s     r   plaintext_statezHTMLTokenizer.plaintext_stateI  sk    {{$$&3;X01OOH%  OOD4;;#:#:8#DDEr   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r| j                  | _        y|t
        v r-t        j                  |g ddd| _        | j                  | _        y|dk(  r4| j                  d       | j                  d       | j                  | _        y|dk(  r>| j                  d	       | j                   j                  |       | j                  | _        y| j                  d
       | j                  d       | j                   j                  |       | j                  | _        y)N!/F)r.   rm   r/   rn   selfClosingAcknowledged>z'expected-tag-name-but-got-right-bracketz<>?z'expected-tag-name-but-got-question-markzexpected-tag-namer\   T)r$   rN   markup_declaration_open_stater'   close_tag_open_stater	   r   rp   r(   tag_name_stater<   r?   r&   rS   bogus_comment_stater   s     r   r{   zHTMLTokenizer.tag_open_stateT  s4   {{$$&3;;;DJ< ; S[22DJ8 7 ]"$+0"D ,,DJ& % S[ FGOOD!DJ  S[ FGKKd#11DJ 	 01OOC KKd#DJr   c                    | j                   j                         }|t        v r,t        j                  |g dd| _        | j                  | _        y	|dk(  r#| j                  d       | j                  | _        y	|t        u r4| j                  d       | j                  d       | j                  | _        y	| j                  d|       | j                   j                  |       | j                  | _        y	)
NFr.   rm   r/   rn   r   z*expected-closing-tag-but-got-right-bracketz expected-closing-tag-but-got-eof</z!expected-closing-tag-but-got-charr/   T)r$   rN   r	   r   rs   r(   r   r'   r<   r&   r   r?   rS   r   r   s     r   r   z"HTMLTokenizer.close_tag_open_statew  s    {{$$&= $	"D ,,DJ  S[IJDJ  S[?@OOD!DJ  @tLKKd#11DJr   c                    | j                   j                         }|t        v r| j                  | _        y|dk(  r| j                          y|t        u r#| j                  d       | j                  | _        y|dk(  r| j                  | _        y|dk(  r)| j                  d       | j                  dxx   dz  cc<   y| j                  dxx   |z  cc<   y)	Nr   zeof-in-tag-namer   rw   rx   rm   rF   T)r$   rN   r   before_attribute_name_stater'   ru   r   r<   r&   self_closing_start_tag_stater(   r   s     r   r   zHTMLTokenizer.tag_name_state  s    {{$$&##99DJ  S[##%  S[./DJ  S[::DJ  X01v&(2&
  v&$.& r   c                     | j                   j                         }|dk(  rd| _        | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        yNr   rC   r\   T)r$   rN   temporary_bufferrcdata_end_tag_open_stater'   r?   rS   r   r   s     r   r   z)HTMLTokenizer.rcdata_less_than_sign_state  se    {{$$&3;$&D!77DJ
  OOC KKd#**DJr   c                    | j                   j                         }|t        v r'| xj                  |z  c_        | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        yNr   T)	r$   rN   r	   r   rcdata_end_tag_name_stater'   r?   rS   r   r   s     r   r   z'HTMLTokenizer.rcdata_end_tag_open_state  so    {{$$&= !!T)!77DJ
  OOD!KKd#**DJr   c                 (   | j                   xr8 | j                   d   j                         | j                  j                         k(  }| j                  j	                         }|t
        v r8|r6t        j                  | j                  g dd| _         | j                  | _	        y|dk(  r8|r6t        j                  | j                  g dd| _         | j                  | _	        y|dk(  rH|rFt        j                  | j                  g dd| _         | j                          | j                  | _	        y|t        v r| xj                  |z  c_        y| j                  d| j                          | j                  j                  |       | j                   | _	        yNrm   Fr   r   r   r   T)r(   lowerr   r$   rN   r   r   rs   r   r'   r   ru   r&   r	   r?   rS   r   r)   appropriater/   s      r   r   z'HTMLTokenizer.rcdata_end_tag_name_state  sz    Pv&,,.$2G2G2M2M2OO 	 {{$$&##--$	"D 99DJ0 / S[[--$	"D ::DJ   S[[--$	"D ##%DJ  ]"!!T)!
  OOb!6!6 789KKd#**DJr   c                     | j                   j                         }|dk(  rd| _        | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        yr   )r$   rN   r   rawtext_end_tag_open_stater'   r?   rS   r   r   s     r   r   z*HTMLTokenizer.rawtext_less_than_sign_state  se    {{$$&3;$&D!88DJ
  OOC KKd#++DJr   c                    | j                   j                         }|t        v r'| xj                  |z  c_        | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        yr   )	r$   rN   r	   r   rawtext_end_tag_name_stater'   r?   rS   r   r   s     r   r   z(HTMLTokenizer.rawtext_end_tag_open_state  so    {{$$&= !!T)!88DJ
  OOD!KKd#++DJr   c                 (   | j                   xr8 | j                   d   j                         | j                  j                         k(  }| j                  j	                         }|t
        v r8|r6t        j                  | j                  g dd| _         | j                  | _	        y|dk(  r8|r6t        j                  | j                  g dd| _         | j                  | _	        y|dk(  rH|rFt        j                  | j                  g dd| _         | j                          | j                  | _	        y|t        v r| xj                  |z  c_        y| j                  d| j                          | j                  j                  |       | j                   | _	        yr   )r(   r   r   r$   rN   r   r   rs   r   r'   r   ru   r&   r	   r?   rS   r   r   s      r   r   z(HTMLTokenizer.rawtext_end_tag_name_state  sz    Pv&,,.$2G2G2M2M2OO 	 {{$$&##--$	"D 99DJ0 / S[[--$	"D ::DJ   S[[--$	"D ##%DJ  ]"!!T)!
  OOb!6!6 789KKd#++DJr   c                 >   | j                   j                         }|dk(  rd| _        | j                  | _        y|dk(  r#| j                  d       | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        y)Nr   rC   r   z<!r\   T)	r$   rN   r   script_data_end_tag_open_stater'   r?   script_data_escape_start_staterS   r   r   s     r   r   z.HTMLTokenizer.script_data_less_than_sign_state  s    {{$$&3;$&D!<<DJ  S[OOD!<<DJ
  OOC KKd#//DJr   c                    | j                   j                         }|t        v r'| xj                  |z  c_        | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        yr   )	r$   rN   r	   r   script_data_end_tag_name_stater'   r?   rS   r   r   s     r   r   z,HTMLTokenizer.script_data_end_tag_open_state)  so    {{$$&= !!T)!<<DJ
  OOD!KKd#//DJr   c                 (   | j                   xr8 | j                   d   j                         | j                  j                         k(  }| j                  j	                         }|t
        v r8|r6t        j                  | j                  g dd| _         | j                  | _	        y|dk(  r8|r6t        j                  | j                  g dd| _         | j                  | _	        y|dk(  rH|rFt        j                  | j                  g dd| _         | j                          | j                  | _	        y|t        v r| xj                  |z  c_        y| j                  d| j                          | j                  j                  |       | j                   | _	        yr   )r(   r   r   r$   rN   r   r   rs   r   r'   r   ru   r&   r	   r?   rS   r   r   s      r   r   z,HTMLTokenizer.script_data_end_tag_name_state4  sz    Pv&,,.$2G2G2M2M2OO 	 {{$$&##--$	"D 99DJ0 / S[[--$	"D ::DJ   S[[--$	"D ##%DJ  ]"!!T)!
  OOb!6!6 789KKd#//DJr   c                     | j                   j                         }|dk(  r#| j                  d       | j                  | _        y| j                   j                  |       | j                  | _        yN-T)r$   rN   r?   #script_data_escape_start_dash_stater'   rS   r   r   s     r   r   z,HTMLTokenizer.script_data_escape_start_stateZ  ]    {{$$&3;OOC AADJ  KKd#//DJr   c                     | j                   j                         }|dk(  r#| j                  d       | j                  | _        y| j                   j                  |       | j                  | _        yr   )r$   rN   r?   #script_data_escaped_dash_dash_stater'   rS   r   r   s     r   r   z1HTMLTokenizer.script_data_escape_start_dash_stated  r   r   c                    | j                   j                         }|dk(  r#| j                  d       | j                  | _        y|dk(  r| j
                  | _        y|dk(  r#| j                  d       | j                  d       y|t        u r| j                  | _        y| j                  || j                   j                  d      z          y)Nr   r\   rw   rx   rF   )r\   r   rw   T)
r$   rN   r?   script_data_escaped_dash_stater'   (script_data_escaped_less_than_sign_stater<   r   r&   r|   r   s     r   script_data_escaped_statez'HTMLTokenizer.script_data_escaped_staten  s    {{$$&3;OOC <<DJ  S[FFDJ  X01OOH%
 	 S[DJ  OOD4;;#:#:;O#PPQr   c                    | j                   j                         }|dk(  r#| j                  d       | j                  | _        y|dk(  r| j
                  | _        y|dk(  r4| j                  d       | j                  d       | j                  | _        y|t        u r| j                  | _        y| j                  |       | j                  | _        y)Nr   r\   rw   rx   rF   T)
r$   rN   r?   r   r'   r   r<   r   r   r&   r   s     r   r   z,HTMLTokenizer.script_data_escaped_dash_state~  s    {{$$&3;OOC AADJ  S[FFDJ  X01OOH%77DJ  S[DJ  OOD!77DJr   c                    | j                   j                         }|dk(  r| j                  d       y|dk(  r| j                  | _        y|dk(  r#| j                  d       | j
                  | _        y|dk(  r4| j                  d       | j                  d       | j                  | _        y|t        u r| j                  | _        y| j                  |       | j                  | _        y)Nr   r\   r   rw   rx   rF   T)
r$   rN   r?   r   r'   r   r<   r   r   r&   r   s     r   r   z1HTMLTokenizer.script_data_escaped_dash_dash_state  s    {{$$&3;OOC   S[FFDJ  S[OOC //DJ  X01OOH%77DJ  S[DJ  OOD!77DJr   c                 X   | j                   j                         }|dk(  rd| _        | j                  | _        y|t
        v r-| j                  d|        || _        | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        yr   )
r$   rN   r   &script_data_escaped_end_tag_open_stater'   r	   r?   %script_data_double_escape_start_staterS   r   r   s     r   r   z6HTMLTokenizer.script_data_escaped_less_than_sign_state  s    {{$$&3;$&D!DDDJ  ]"OOavJ'$(D!CCDJ
  OOC KKd#77DJr   c                     | j                   j                         }|t        v r|| _        | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        yr   )	r$   rN   r	   r   &script_data_escaped_end_tag_name_stater'   r?   rS   r   r   s     r   r   z4HTMLTokenizer.script_data_escaped_end_tag_open_state  sf    {{$$&= $(D!DDDJ
  OOD!KKd#77DJr   c                 (   | j                   xr8 | j                   d   j                         | j                  j                         k(  }| j                  j	                         }|t
        v r8|r6t        j                  | j                  g dd| _         | j                  | _	        y|dk(  r8|r6t        j                  | j                  g dd| _         | j                  | _	        y|dk(  rH|rFt        j                  | j                  g dd| _         | j                          | j                  | _	        y|t        v r| xj                  |z  c_        y| j                  d| j                          | j                  j                  |       | j                   | _	        yr   )r(   r   r   r$   rN   r   r   rs   r   r'   r   ru   r&   r	   r?   rS   r   r   s      r   r   z4HTMLTokenizer.script_data_escaped_end_tag_name_state  sz    Pv&,,.$2G2G2M2M2OO 	 {{$$&##--$	"D 99DJ0 / S[[--$	"D ::DJ   S[[--$	"D ##%DJ  ]"!!T)!
  OOb!6!6 789KKd#77DJr   c                    | j                   j                         }|t        t        d      z  v rR| j	                  |       | j
                  j                         dk(  r| j                  | _        y| j                  | _        y|t        v r'| j	                  |       | xj
                  |z  c_        y| j                   j                  |       | j                  | _        yN)r   r   scriptT)r$   rN   r   rQ   r?   r   r    script_data_double_escaped_stater'   r   r	   rS   r   s     r   r   z3HTMLTokenizer.script_data_double_escape_start_state  s    {{$$&$y'<<=OOD!$$**,8!BB
  ";;
  ]"OOD!!!T)!  KKd#77DJr   c                    | j                   j                         }|dk(  r#| j                  d       | j                  | _        y|dk(  r#| j                  d       | j
                  | _        y|dk(  r#| j                  d       | j                  d       y|t        u r#| j                  d       | j                  | _        y| j                  |       yNr   r\   rw   rx   rF   eof-in-script-in-scriptT)	r$   rN   r?   %script_data_double_escaped_dash_stater'   /script_data_double_escaped_less_than_sign_stater<   r   r&   r   s     r   r   z.HTMLTokenizer.script_data_double_escaped_state  s    {{$$&3;OOC CCDJ  S[OOC MMDJ  X01OOH%  S[67DJ  OOD!r   c                    | j                   j                         }|dk(  r#| j                  d       | j                  | _        y|dk(  r#| j                  d       | j
                  | _        y|dk(  r4| j                  d       | j                  d       | j                  | _        y|t        u r#| j                  d       | j                  | _        y| j                  |       | j                  | _        yr   )
r$   rN   r?   *script_data_double_escaped_dash_dash_stater'   r   r<   r   r   r&   r   s     r   r   z3HTMLTokenizer.script_data_double_escaped_dash_state  s    {{$$&3;OOC HHDJ  S[OOC MMDJ  X01OOH%>>DJ  S[67DJ  OOD!>>DJr   c                    | j                   j                         }|dk(  r| j                  d       y|dk(  r#| j                  d       | j                  | _        y|dk(  r#| j                  d       | j
                  | _        y|dk(  r4| j                  d       | j                  d       | j                  | _        y|t        u r#| j                  d       | j                  | _        y| j                  |       | j                  | _        y)	Nr   r\   r   rw   rx   rF   r   T)
r$   rN   r?   r   r'   r   r<   r   r   r&   r   s     r   r   z8HTMLTokenizer.script_data_double_escaped_dash_dash_state  s    {{$$&3;OOC " ! S[OOC MMDJ  S[OOC //DJ  X01OOH%>>DJ  S[67DJ  OOD!>>DJr   c                     | j                   j                         }|dk(  r*| j                  d       d| _        | j                  | _        y| j                   j                  |       | j                  | _        y)Nr   rC   T)r$   rN   r?   r   #script_data_double_escape_end_stater'   rS   r   r   s     r   r   z=HTMLTokenizer.script_data_double_escaped_less_than_sign_state0  se    {{$$&3;OOC $&D!AADJ  KKd#>>DJr   c                    | j                   j                         }|t        t        d      z  v rR| j	                  |       | j
                  j                         dk(  r| j                  | _        y| j                  | _        y|t        v r'| j	                  |       | xj
                  |z  c_        y| j                   j                  |       | j                  | _        yr   )r$   rN   r   rQ   r?   r   r   r   r'   r   r	   rS   r   s     r   r   z1HTMLTokenizer.script_data_double_escape_end_state;  s    {{$$&$y'<<=OOD!$$**,8!;;
  "BB
  ]"OOD!!!T)!  KKd#>>DJr   c                 .   | j                   j                         }|t        v r!| j                   j                  t        d       y|t        v r2| j
                  d   j                  |dg       | j                  | _        y|dk(  r| j                          y|dk(  r| j                  | _        y|dv rC| j                  d       | j
                  d   j                  |dg       | j                  | _        y|dk(  rC| j                  d	       | j
                  d   j                  d
dg       | j                  | _        y|t        u r#| j                  d       | j                  | _        y| j
                  d   j                  |dg       | j                  | _        y)NTr/   rC   r   r   )'"ra   r\   #invalid-character-in-attribute-namerw   rx   rF   z#expected-attribute-name-but-got-eof)r$   rN   r   r|   r	   r(   r9   attribute_name_stater'   ru   r   r<   r   r&   r   s     r   r   z)HTMLTokenizer.before_attribute_name_stateK  s   {{$$&##KK##$4d;, + ]"v&--tRj922DJ& % S[##%" ! S[::DJ  ))BCv&--tRj922DJ  X01v&--xn=22DJ  S[BCDJ  v&--tRj922DJr   c                    | j                   j                         }d}d}|dk(  r| j                  | _        n7|t        v rA| j
                  d   d   dxx   || j                   j                  t        d      z   z  cc<   d}n|dk(  rd}n|t        v r| j                  | _        n|dk(  r| j                  | _        n|d	k(  r1| j                  d
       | j
                  d   d   dxx   dz  cc<   d}n|dv r1| j                  d       | j
                  d   d   dxx   |z  cc<   d}nJ|t        u r#| j                  d       | j                  | _        n| j
                  d   d   dxx   |z  cc<   d}|r| j
                  d   d   d   j                  t              | j
                  d   d   d<   | j
                  d   d d D ]0  \  }}| j
                  d   d   d   |k(  s| j                  d        n |r| j                          y)NTFra   r/   r^   r   r   r   rw   rx   rF   r   r   r\   r   zeof-in-attribute-namezduplicate-attribute)r$   rN   before_attribute_value_stater'   r	   r(   r|   r   after_attribute_name_stater   r<   r   r&   ro   r
   ru   )r)   r/   leaving_this_state
emit_tokenrm   _s         r   r   z"HTMLTokenizer.attribute_name_statef  s   {{$$&!
3;::DJ]"v&r*1-t{{..}dCCE-!&S[ J%%88DJS[::DJX01v&r*1-9-!&_$BCv&r*1-5-!&S[45DJv&r*1-5-!&
 ""6*2.q1;;<PQ v&r*1---f5cr: a%%f-b1!4<$$%:;
 '')r   c                 \   | j                   j                         }|t        v r!| j                   j                  t        d       y|dk(  r| j                  | _        y|dk(  r| j                          y|t        v r2| j                  d   j                  |dg       | j                  | _        y|dk(  r| j                  | _        y|dk(  rC| j                  d       | j                  d   j                  d	dg       | j                  | _        y|d
v rC| j                  d       | j                  d   j                  |dg       | j                  | _        y|t        u r#| j                  d       | j                  | _        y| j                  d   j                  |dg       | j                  | _        y)NTra   r   r/   rC   r   rw   rx   rF   r   z&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)r$   rN   r   r|   r   r'   ru   r	   r(   r9   r   r   r<   r   r&   r   s     r   r   z(HTMLTokenizer.after_attribute_name_state  s   {{$$&##KK##$4d;0 / S[::DJ, + S[##%( ' ]"v&--tRj922DJ" ! S[::DJ  X01v&--xn=22DJ  _$EFv&--tRj922DJ  S[>?DJ  v&--tRj922DJr   c                 \   | j                   j                         }|t        v r!| j                   j                  t        d       y|dk(  r| j                  | _        y|dk(  r-| j                  | _        | j                   j                  |       y|dk(  r| j                  | _        y|dk(  r"| j                  d       | j                          y|dk(  r@| j                  d       | j                  d	   d
   dxx   dz  cc<   | j                  | _        y|dv r@| j                  d       | j                  d	   d
   dxx   |z  cc<   | j                  | _        y|t        u r#| j                  d       | j                  | _        y| j                  d	   d
   dxx   |z  cc<   | j                  | _        y)NTr   r[   r   r   z.expected-attribute-value-but-got-right-bracketrw   rx   r/   r^   r   rF   )ra   r\   `z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)r$   rN   r   r|   #attribute_value_double_quoted_stater'   attribute_value_unquoted_staterS   #attribute_value_single_quoted_stater<   ru   r(   r   r&   r   s     r   r   z*HTMLTokenizer.before_attribute_value_state  s   {{$$&##KK##$4d;2 1 T\AADJ. - S[<<DJKKd#( ' S[AADJ$ # S[MN##%  X01v&r*1-9-<<DJ  _$ABv&r*1-5-<<DJ  S[CDDJ  v&r*1-5-<<DJr   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r| j	                  d       y|dk(  r/| j                  d       | j                  d   d   dxx   dz  cc<   y|t        u r#| j                  d	       | j                  | _        y| j                  d   d   dxx   || j                   j                  d
      z   z  cc<   y)Nr   r[   rw   rx   r/   r^   r   rF   z#eof-in-attribute-value-double-quote)r   r[   rw   T
r$   rN   after_attribute_value_stater'   rk   r<   r(   r   r&   r|   r   s     r   r   z1HTMLTokenizer.attribute_value_double_quoted_state  s    {{$$&4<99DJ  S[,,S1  X01v&r*1-9-  S[BCDJ  v&r*1-t{{../DEEG-r   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r| j	                  d       y|dk(  r/| j                  d       | j                  d   d   dxx   dz  cc<   y|t        u r#| j                  d	       | j                  | _        y| j                  d   d   dxx   || j                   j                  d
      z   z  cc<   y)Nr   r[   rw   rx   r/   r^   r   rF   z#eof-in-attribute-value-single-quote)r   r[   rw   Tr   r   s     r   r   z1HTMLTokenizer.attribute_value_single_quoted_state  s    {{$$&3;99DJ  S[,,S1  X01v&r*1-9-  S[BCDJ  v&r*1-t{{../CDDF-r   c           	      |   | j                   j                         }|t        v r| j                  | _        y|dk(  r| j                  d       y|dk(  r| j                          y|dv r/| j                  d       | j                  d   d   dxx   |z  cc<   y|dk(  r/| j                  d	       | j                  d   d   dxx   d
z  cc<   y|t        u r#| j                  d       | j                  | _        y| j                  d   d   dxx   || j                   j                  t        d      t        z        z   z  cc<   y)Nr[   r   )r   r   ra   r\   r   z0unexpected-character-in-unquoted-attribute-valuer/   r^   r   rw   rx   rF   z eof-in-attribute-value-no-quotes)r[   r   r   r   ra   r\   r   rw   T)r$   rN   r   r   r'   rk   ru   r<   r(   r   r&   r|   rQ   r   s     r   r   z,HTMLTokenizer.attribute_value_unquoted_state  s[   {{$$&##99DJ& % S[,,S1" ! S[##%  ..OPv&r*1-5-  X01v&r*1-9-  S[?@DJ 	 v&r*1-t{{..KL$%& &'- r   c                    | j                   j                         }|t        v r| j                  | _        y|dk(  r| j                          y|dk(  r| j                  | _        y|t        u r>| j                  d       | j                   j                  |       | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        y)Nr   r   z$unexpected-eof-after-attribute-valuez*unexpected-character-after-attribute-valueT)r$   rN   r   r   r'   ru   r   r   r<   rS   r&   r   s     r   r   z)HTMLTokenizer.after_attribute_value_state  s    {{$$&##99DJ  S[##%  S[::DJ  S[CDKKd#DJ
  IJKKd#99DJr   c                    | j                   j                         }|dk(  r d| j                  d<   | j                          y|t        u r>| j                  d       | j                   j                  |       | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        y)Nr   Trn   z#unexpected-eof-after-solidus-in-tagz)unexpected-character-after-solidus-in-tag)
r$   rN   r(   ru   r   r<   rS   r&   r'   r   r   s     r   r   z*HTMLTokenizer.self_closing_start_tag_state  s    {{$$&3;04D}-##%  S[BCKKd#DJ
  HIKKd#99DJr   c                    | j                   j                  d      }|j                  dd      }| j                  j	                  t
        j                  |d       | j                   j                          | j                  | _	        y)Nr   rw   rF   r-   T)
r$   r|   replacer0   r9   r   COMMENTrN   r&   r'   r   s     r   r   z!HTMLTokenizer.bogus_comment_state-  se     {{&&s+||Hh/ EF 	__
r   c                 F   | j                   j                         g}|d   dk(  r\|j                  | j                   j                                |d   dk(  r|t        j                  dd| _        | j                  | _        y|d   ry|d   dv rrd}dD ];  }|j                  | j                   j                                |d   r|d   |vs9d} n |rt        j                  dd d dd	| _        | j                  | _        y|d   d
k(  r| j                  | j                  j                  j                  r| j                  j                  j                  d   j                  | j                  j                  j                  k7  rRd}dD ]7  }|j                  | j                   j                                |d   |k7  s5d} n |r| j                  | _        y| j!                  d       |r,| j                   j#                  |j%                                |r,| j&                  | _        y)Nr^   r   rC   r-   TdD)oOcCtTyYpPeEF)r.   rm   publicIdsystemIdcorrect[zCDATA[zexpected-dashes-or-doctype)r$   rN   r9   r   r   r(   comment_start_stater'   DOCTYPEdoctype_stater%   treeopen_elements	namespacedefault_namespacecdata_section_stater<   rS   r3   r   )r)   rW   matchedexpecteds       r   r   z+HTMLTokenizer.markup_declaration_open_state;  s   &&()9LL..01RyC.3mmR%H"!55
2Y59,G@ T[[2245RyE"IX$=#G	
 !MM $ $#&" "//
Bi3kk%kk,,kk,,R0::kk001G$ T[[22459(#G	
 !55
56KKeiik* --
r   c                 L   | j                   j                         }|dk(  r| j                  | _        y	|dk(  r)| j	                  d       | j
                  dxx   dz  cc<   y	|dk(  rH| j	                  d       | j                  j                  | j
                         | j                  | _        y	|t        u rH| j	                  d       | j                  j                  | j
                         | j                  | _        y	| j
                  dxx   |z  cc<   | j                  | _        y	)
Nr   rw   rx   r/   rF   r   incorrect-commenteof-in-commentT)r$   rN   comment_start_dash_stater'   r<   r(   r0   r9   r&   r   comment_stater   s     r   r  z!HTMLTokenizer.comment_start_statej  s   {{$$&3;66DJ  X01v&(2&  S[01##D$6$67DJ  S[-.##D$6$67DJ  v&$.&++DJr   c                 R   | j                   j                         }|dk(  r| j                  | _        y	|dk(  r)| j	                  d       | j
                  dxx   dz  cc<   y	|dk(  rH| j	                  d       | j                  j                  | j
                         | j                  | _        y	|t        u rH| j	                  d       | j                  j                  | j
                         | j                  | _        y	| j
                  dxx   d| z  cc<   | j                  | _        y	)
Nr   rw   rx   r/      -�r   r  r  T)r$   rN   comment_end_stater'   r<   r(   r0   r9   r&   r   r  r   s     r   r  z&HTMLTokenizer.comment_start_dash_state~  s   {{$$&3;//DJ  X01v&)3&  S[01##D$6$67DJ  S[-.##D$6$67DJ  v&AdV*4&++DJr   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r)| j	                  d       | j
                  dxx   dz  cc<   y|t        u rH| j	                  d       | j                  j                  | j
                         | j                  | _        y| j
                  dxx   || j                   j                  d      z   z  cc<   y)	Nr   rw   rx   r/   rF   r  )r   rw   T)r$   rN   comment_end_dash_stater'   r<   r(   r   r0   r9   r&   r|   r   s     r   r  zHTMLTokenizer.comment_state  s    {{$$&3;44DJ  X01v&(2&  S[-.##D$6$67DJ  v&t{{..??A&r   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r:| j	                  d       | j
                  dxx   dz  cc<   | j                  | _        y|t        u rH| j	                  d       | j                  j                  | j
                         | j                  | _        y| j
                  dxx   d| z  cc<   | j                  | _        y)Nr   rw   rx   r/   r  zeof-in-comment-end-dashT)r$   rN   r  r'   r<   r(   r  r   r0   r9   r&   r   s     r   r  z$HTMLTokenizer.comment_end_dash_state  s    {{$$&3;//DJ  X01v&)3&++DJ  S[67##D$6$67DJ  v&AdV*4&++DJr   c                    | j                   j                         }|dk(  r7| j                  j                  | j                         | j
                  | _        y|dk(  r:| j                  d       | j                  dxx   dz  cc<   | j                  | _        y|dk(  r#| j                  d       | j                  | _        y|dk(  r)| j                  d	       | j                  dxx   |z  cc<   y|t        u rH| j                  d
       | j                  j                  | j                         | j
                  | _        y| j                  d       | j                  dxx   d| z  cc<   | j                  | _        y)Nr   rw   rx   r/   u   --�r   z,unexpected-bang-after-double-dash-in-commentr   z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)r$   rN   r0   r9   r(   r&   r'   r<   r  comment_end_bang_stater   r   s     r   r  zHTMLTokenizer.comment_end_state  sb   {{$$&3;##D$6$67DJ( ' X01v&*4&++DJ   S[KL44DJ  S[KLv&$.&  S[9:##D$6$67DJ  9:v&Btf+5&++DJr   c                    | j                   j                         }|dk(  r7| j                  j                  | j                         | j
                  | _        y	|dk(  r)| j                  dxx   dz  cc<   | j                  | _        y	|dk(  r:| j                  d       | j                  dxx   dz  cc<   | j                  | _        y	|t        u rH| j                  d       | j                  j                  | j                         | j
                  | _        y	| j                  dxx   d| z  cc<   | j                  | _        y	)
Nr   r   r/   z--!rw   rx   u   --!�zeof-in-comment-end-bang-stateT)r$   rN   r0   r9   r(   r&   r'   r  r<   r  r   r   s     r   r  z$HTMLTokenizer.comment_end_bang_state  s'   {{$$&3;##D$6$67DJ  S[v&%/&44DJ  X01v&+5&++DJ  S[<=##D$6$67DJ  v&Cv,6&++DJr   c                    | j                   j                         }|t        v r| j                  | _        y|t
        u rW| j                  d       d| j                  d<   | j                  j                  | j                         | j                  | _        y| j                  d       | j                   j                  |       | j                  | _        y)N!expected-doctype-name-but-got-eofFr   zneed-space-after-doctypeT)r$   rN   r   before_doctype_name_stater'   r   r<   r(   r0   r9   r&   rS   r   s     r   r  zHTMLTokenizer.doctype_state  s    {{$$&##77DJ  S[@A,1Dy)##D$6$67DJ
  78KKd#77DJr   c                 p   | j                   j                         }|t        v r	 y
|dk(  rW| j                  d       d| j                  d<   | j
                  j                  | j                         | j                  | _        y
|dk(  r2| j                  d       d| j                  d<   | j                  | _        y
|t        u rW| j                  d	       d| j                  d<   | j
                  j                  | j                         | j                  | _        y
|| j                  d<   | j                  | _        y
)Nr   z+expected-doctype-name-but-got-right-bracketFr   rw   rx   rF   rm   r  T)r$   rN   r   r<   r(   r0   r9   r&   r'   doctype_name_stater   r   s     r   r  z'HTMLTokenizer.before_doctype_name_state  s&   {{$$&##$ # S[JK,1Dy)##D$6$67DJ  X01)1Dv&00DJ  S[@A,1Dy)##D$6$67DJ  *.Dv&00DJr   c                 h   | j                   j                         }|t        v rA| j                  d   j	                  t
              | j                  d<   | j                  | _        y	|dk(  rf| j                  d   j	                  t
              | j                  d<   | j                  j                  | j                         | j                  | _        y	|dk(  r:| j                  d       | j                  dxx   dz  cc<   | j                  | _        y	|t        u r| j                  d       d| j                  d<   | j                  d   j	                  t
              | j                  d<   | j                  j                  | j                         | j                  | _        y	| j                  dxx   |z  cc<   y	)
Nrm   r   rw   rx   rF   zeof-in-doctype-nameFr   T)r$   rN   r   r(   ro   r
   after_doctype_name_stater'   r0   r9   r&   r<   r  r   r   s     r   r  z HTMLTokenizer.doctype_name_state  s   {{$$&##""6*445IJ v&66DJ& % S[""6*445IJ v&##D$6$67DJ  X01v&(2&00DJ  S[23,1Dy)""6*445IJ v&##D$6$67DJ  v&$.&r   c                 p   | j                   j                         }|t        v r	 y|dk(  r7| j                  j	                  | j
                         | j                  | _        y|t        u rrd| j
                  d<   | j                   j                  |       | j                  d       | j                  j	                  | j
                         | j                  | _        y|rE|dv rAd}dD ]&  }| j                   j                         }|r||vs$d} n |rY| j                  | _        y|rE|dv rAd}d	D ]&  }| j                   j                         }|r||vs$d} n |r| j                  | _        y| j                   j                  |       | j                  d
|       d| j
                  d<   | j                  | _        y)Nr   Fr   eof-in-doctyper   T)uUbBlLiIr   sS)r   r'  r   r   mMz*expected-space-or-right-bracket-in-doctyper   )r$   rN   r   r0   r9   r(   r&   r'   r   rS   r<   "after_doctype_public_keyword_state"after_doctype_system_keyword_statebogus_doctype_state)r)   r/   r
  r  s       r   r   z&HTMLTokenizer.after_doctype_name_state"  s   {{$$&##P O S[##D$6$67DJJ I S[,1Dy)KKd#-.##D$6$67DJ> ;  > H;;002D4x#7"'	
 !%!H!HDJ$$, > H;;002D4x#7"'	
 !%!H!HDJ KKd#IPTU,1Dy)11DJr   c                    | j                   j                         }|t        v r| j                  | _        y|dv r>| j                  d       | j                   j                  |       | j                  | _        y|t        u rW| j                  d       d| j                  d<   | j                  j                  | j                         | j                  | _        y| j                   j                  |       | j                  | _        yN)r   r   unexpected-char-in-doctyper"  Fr   T)r$   rN   r   &before_doctype_public_identifier_stater'   r<   rS   r   r(   r0   r9   r&   r   s     r   r)  z0HTMLTokenizer.after_doctype_public_keyword_stateO      {{$$&##DDDJ  Z9:KKd#DDDJ  S[-.,1Dy)##D$6$67DJ  KKd#DDDJr   c                    | j                   j                         }|t        v r	 y|dk(  r!d| j                  d<   | j                  | _        y|dk(  r!d| j                  d<   | j                  | _        y|dk(  rW| j                  d       d| j                  d<   | j                  j                  | j                         | j                  | _        y|t        u rW| j                  d	       d| j                  d<   | j                  j                  | j                         | j                  | _        y| j                  d
       d| j                  d<   | j                  | _        y)Nr   rC   r   r   r   unexpected-end-of-doctypeFr   r"  r.  T)r$   rN   r   r(   -doctype_public_identifier_double_quoted_stater'   -doctype_public_identifier_single_quoted_stater<   r0   r9   r&   r   r+  r   s     r   r/  z4HTMLTokenizer.before_doctype_public_identifier_statea  sP   {{$$&##* ) T\-/Dz*KKDJ$ # S[-/Dz*KKDJ  S[89,1Dy)##D$6$67DJ  S[-.,1Dy)##D$6$67DJ
  9:,1Dy)11DJr   c                 f   | j                   j                         }|dk(  r| j                  | _        y|dk(  r)| j	                  d       | j
                  dxx   dz  cc<   y|dk(  rW| j	                  d       d| j
                  d	<   | j                  j                  | j
                         | j                  | _        y|t        u rW| j	                  d
       d| j
                  d	<   | j                  j                  | j
                         | j                  | _        y| j
                  dxx   |z  cc<   y)Nr   rw   rx   r   rF   r   r2  Fr   r"  T
r$   rN   %after_doctype_public_identifier_stater'   r<   r(   r0   r9   r&   r   r   s     r   r3  z;HTMLTokenizer.doctype_public_identifier_double_quoted_state{      {{$$&3;CCDJ   X01z*h6*  S[89,1Dy)##D$6$67DJ  S[-.,1Dy)##D$6$67DJ  z*d2*r   c                 f   | j                   j                         }|dk(  r| j                  | _        y|dk(  r)| j	                  d       | j
                  dxx   dz  cc<   y|dk(  rW| j	                  d       d| j
                  d	<   | j                  j                  | j
                         | j                  | _        y|t        u rW| j	                  d
       d| j
                  d	<   | j                  j                  | j
                         | j                  | _        y| j
                  dxx   |z  cc<   y)Nr   rw   rx   r   rF   r   r2  Fr   r"  Tr6  r   s     r   r4  z;HTMLTokenizer.doctype_public_identifier_single_quoted_state  r8  r   c                    | j                   j                         }|t        v r| j                  | _        y
|dk(  r7| j
                  j                  | j                         | j                  | _        y
|dk(  r2| j                  d       d| j                  d<   | j                  | _        y
|dk(  r2| j                  d       d| j                  d<   | j                  | _        y
|t        u rW| j                  d       d| j                  d	<   | j
                  j                  | j                         | j                  | _        y
| j                  d       d| j                  d	<   | j                  | _        y
)Nr   r   r.  rC   r   r   r"  Fr   T)r$   rN   r   3between_doctype_public_and_system_identifiers_stater'   r0   r9   r(   r&   r<   -doctype_system_identifier_double_quoted_state-doctype_system_identifier_single_quoted_stater   r+  r   s     r   r7  z3HTMLTokenizer.after_doctype_public_identifier_state  sY   {{$$&##QQDJ* ) S[##D$6$67DJ$ # S[9:-/Dz*KKDJ  S[9:-/Dz*KKDJ  S[-.,1Dy)##D$6$67DJ
  9:,1Dy)11DJr   c                 |   | j                   j                         }|t        v r	 y
|dk(  r7| j                  j	                  | j
                         | j                  | _        y
|dk(  r!d| j
                  d<   | j                  | _        y
|dk(  r!d| j
                  d<   | j                  | _        y
|t        u rW| j                  d       d| j
                  d<   | j                  j	                  | j
                         | j                  | _        y
| j                  d	       d| j
                  d<   | j                  | _        y
)Nr   r   rC   r   r   r"  Fr   r.  T)r$   rN   r   r0   r9   r(   r&   r'   r<  r=  r   r<   r+  r   s     r   r;  zAHTMLTokenizer.between_doctype_public_and_system_identifiers_state  s1   {{$$&##& % S[##D$6$67DJ   S[-/Dz*KKDJ  S[-/Dz*KKDJ  S[-.,1Dy)##D$6$67DJ
  9:,1Dy)11DJr   c                    | j                   j                         }|t        v r| j                  | _        y|dv r>| j                  d       | j                   j                  |       | j                  | _        y|t        u rW| j                  d       d| j                  d<   | j                  j                  | j                         | j                  | _        y| j                   j                  |       | j                  | _        yr-  )r$   rN   r   &before_doctype_system_identifier_stater'   r<   rS   r   r(   r0   r9   r&   r   s     r   r*  z0HTMLTokenizer.after_doctype_system_keyword_state  r0  r   c                    | j                   j                         }|t        v r	 y
|dk(  r!d| j                  d<   | j                  | _        y
|dk(  r!d| j                  d<   | j                  | _        y
|dk(  rW| j                  d       d| j                  d<   | j                  j                  | j                         | j                  | _        y
|t        u rW| j                  d	       d| j                  d<   | j                  j                  | j                         | j                  | _        y
| j                  d       d| j                  d<   | j                  | _        y
)Nr   rC   r   r   r   r.  Fr   r"  T)r$   rN   r   r(   r<  r'   r=  r<   r0   r9   r&   r   r+  r   s     r   r@  z4HTMLTokenizer.before_doctype_system_identifier_state  sP   {{$$&##* ) S[-/Dz*KKDJ$ # S[-/Dz*KKDJ  S[9:,1Dy)##D$6$67DJ  S[-.,1Dy)##D$6$67DJ
  9:,1Dy)11DJr   c                 f   | j                   j                         }|dk(  r| j                  | _        y|dk(  r)| j	                  d       | j
                  dxx   dz  cc<   y|dk(  rW| j	                  d       d| j
                  d	<   | j                  j                  | j
                         | j                  | _        y|t        u rW| j	                  d
       d| j
                  d	<   | j                  j                  | j
                         | j                  | _        y| j
                  dxx   |z  cc<   y)Nr   rw   rx   r   rF   r   r2  Fr   r"  T
r$   rN   %after_doctype_system_identifier_stater'   r<   r(   r0   r9   r&   r   r   s     r   r<  z;HTMLTokenizer.doctype_system_identifier_double_quoted_state  s    {{$$&4<CCDJ   X01z*h6*  S[89,1Dy)##D$6$67DJ  S[-.,1Dy)##D$6$67DJ  z*d2*r   c                 f   | j                   j                         }|dk(  r| j                  | _        y|dk(  r)| j	                  d       | j
                  dxx   dz  cc<   y|dk(  rW| j	                  d       d| j
                  d	<   | j                  j                  | j
                         | j                  | _        y|t        u rW| j	                  d
       d| j
                  d	<   | j                  j                  | j
                         | j                  | _        y| j
                  dxx   |z  cc<   y)Nr   rw   rx   r   rF   r   r2  Fr   r"  TrC  r   s     r   r=  z;HTMLTokenizer.doctype_system_identifier_single_quoted_state  r8  r   c                    | j                   j                         }|t        v r	 y|dk(  r7| j                  j	                  | j
                         | j                  | _        y|t        u rW| j                  d       d| j
                  d<   | j                  j	                  | j
                         | j                  | _        y| j                  d       | j                  | _        y)Nr   r"  Fr   r.  T)r$   rN   r   r0   r9   r(   r&   r'   r   r<   r+  r   s     r   rD  z3HTMLTokenizer.after_doctype_system_identifier_state-  s    {{$$&##  S[##D$6$67DJ  S[-.,1Dy)##D$6$67DJ  9:11DJr   c                 f   | j                   j                         }|dk(  r7| j                  j                  | j                         | j
                  | _        y|t        u rR| j                   j                  |       | j                  j                  | j                         | j
                  | _        y	 y)Nr   T)	r$   rN   r0   r9   r(   r&   r'   r   rS   r   s     r   r+  z!HTMLTokenizer.bogus_doctype_state>  s    {{$$&3;##D$6$67DJ  S[KKd###D$6$67DJ  r   c                 T   g }	 |j                  | j                  j                  d             |j                  | j                  j                  d             | j                  j                         }|t        u rn0|dk(  sJ |d   dd  dk(  r|d   d d |d<   n|j                  |       dj                  |      }|j                  d      x}d	kD  r3t        |      D ]  }| j                  d
        |j                  dd      }|r| j                  |       | j                  | _        y)NT]r   r^   z]]rC   rw   r   rx   rF   )r9   r$   r|   rN   r   rP   countr   r<   r   r?   r&   r'   )r)   r/   char
null_countr   s        r   r	  z!HTMLTokenizer.cdata_section_stateL  s   KK//45KK//45;;((*Ds{s{"{8BC=D(#Bx}DHKK%  wwt}**X..J!3:& 6  !456<<(3DOOD!__
r   r#   )NF)O__name__
__module____qualname____doc__r+   r6   r<   r?   rY   ri   rk   ru   r&   rz   r   r   r   r   r   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r  r   r)  r/  r3  r4  r7  r;  r*  r@  r<  r=  rD  r+  r	   r   r   r!   r!   &   s   "1('K3jEKNB%86
8
	!F0(		$L		$L	$L $(	$L $(,	 6/b:<""0$-^(( "4*.4+Z$4**40$4**"r   r!   N)bisectr   collectionsr   html.entitiesr   r   	constantsr   r   r	   r
   r   r   r   r   r   inputstreamr   tuplesortedr   r   r   r!   rR  r   r   <module>rZ     sG      +
 
 
 )F8$%- r   