ó
ø°äRc           @   sÑ  d  Z  d d l Z d d l Z d d l m Z d d l m Z d d l m	 Z	 d d l
 m Z m Z m Z y
 e Z Wn e k
 r“ e e f Z n Xy d d l m Z Wn! e k
 rË d d l m Z n Xy d d l m Z Wn! e k
 rd d l m Z n Xd	 e f d
 „  ƒ  YZ y d d l m Z Wn e k
 rAn  Xd e f d „  ƒ  YZ e ƒ  Z d „  Z e d d „ Z e  e  d d „ Z! e  e  d d „ Z" e d d „ Z# e d d „ Z$ d „  Z% e ƒ  Z& d S(   s?   
An interface to html5lib that mimics the lxml.html interface.
iÿÿÿÿN(   t
   HTMLParser(   t   TreeBuilder(   t   etree(   t   _contains_block_level_tagt   XHTML_NAMESPACEt   Element(   t   urlopen(   t   urlparseR    c           B   s   e  Z d  Z e d „ Z RS(   s*   An html5lib HTML parser with lxml as tree.c         K   s    t  j |  d | d t | d  S(   Nt   strictt   tree(   t   _HTMLParsert   __init__R   (   t   selfR   t   kwargs(    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyR      s    (   t   __name__t
   __module__t   __doc__t   FalseR   (    (    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyR       s   (   t   XHTMLParserR   c           B   s   e  Z d  Z e d „ Z RS(   s+   An html5lib XHTML Parser with lxml as tree.c         K   s    t  j |  d | d t | d  S(   NR   R	   (   t   _XHTMLParserR   R   (   R   R   R   (    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyR   +   s    (   R   R   R   R   R   (    (    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyR   (   s   c         C   s6   |  j  | ƒ } | d  k	 r | S|  j  d t | f ƒ S(   Ns   {%s}%s(   t   findt   NoneR   (   R	   t   tagt   elem(    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyt	   _find_tag1   s    c         C   sL   t  |  t ƒ s t d ƒ ‚ n  | d k r3 t } n  | j |  d | ƒj ƒ  S(   s%   Parse a whole document into a string.s   string requiredt
   useChardetN(   t
   isinstancet   _stringst	   TypeErrorR   t   html_parsert   parset   getroot(   t   htmlt   guess_charsett   parser(    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyt   document_fromstring8   s
    	c         C   s¥   t  |  t ƒ s t d ƒ ‚ n  | d k r3 t } n  | j |  d d | ƒ} | r¡ t  | d t ƒ r¡ | r¡ | d j ƒ  r” t j d | d ƒ ‚ n  | d =q¡ n  | S(   s”  Parses several HTML elements, returning a list of elements.

    The first item in the list may be a string.  If no_leading_text is true,
    then it will be an error if there is leading text, and it will always be
    a list of only elements.

    If `guess_charset` is `True` and the text was not unicode but a
    bytestring, the `chardet` library will perform charset guessing on the
    string.
    s   string requiredt   divR   i    s   There is leading text: %rN(	   R   R   R   R   R   t   parseFragmentt   stripR   t   ParserError(   R    t   no_leading_textR!   R"   t   children(    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyt   fragments_fromstringC   s    		c         C   s;  t  |  t ƒ s t d ƒ ‚ n  t | ƒ } t |  d | d | d | ƒ} | r· t  | t ƒ sg d } n  t | ƒ } | r³ t  | d t ƒ r£ | d | _ | d =n  | j | ƒ n  | S| sÏ t j	 d ƒ ‚ n  t
 | ƒ d k ró t j	 d	 ƒ ‚ n  | d } | j r.| j j ƒ  r.t j	 d
 | j ƒ ‚ n  d | _ | S(   sX  Parses a single HTML element; it is an error if there is more than
    one element, or if anything but whitespace precedes or follows the
    element.

    If create_parent is true (or is a tag name) then a parent node
    will be created to encapsulate the HTML in a single element.  In
    this case, leading or trailing text is allowed.
    s   string requiredR!   R"   R(   R$   i    s   No elements foundi   s   Multiple elements founds   Element followed by text: %rN(   R   R   R   t   boolR*   R   t   textt   extendR   R'   t   lent   tailR&   R   (   R    t   create_parentR!   R"   t   accept_leading_textt   elementst   new_roott   result(    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyt   fragment_fromstring_   s2    

	

	c         C   s  t  |  t ƒ s t d ƒ ‚ n  t |  d | d | ƒ} |  d  j ƒ  j ƒ  } | j d ƒ sj | j d ƒ rn | St | d ƒ } t | ƒ r | St | d ƒ } t | ƒ d	 k rò | j	 sÈ | j	 j
 ƒ  rò | d
 j sê | d
 j j
 ƒ  rò | d St | ƒ r
d | _ n	 d | _ | S(   sü   Parse the html, returning a single element/document.

    This tries to minimally parse the chunk of text, without knowing if it
    is a fragment or a document.

    base_url will set the document's base_url attribute (and the tree's docinfo.URL)
    s   string requiredR"   R!   i2   s   <htmls	   <!doctypet   headt   bodyi   iÿÿÿÿi    R$   t   span(   R   R   R   R#   t   lstript   lowert
   startswithR   R.   R,   R&   R/   R   R   (   R    R!   R"   t   doct   startR6   R7   (    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyt
   fromstringˆ   s$    	,"	c         C   sj   | d k r t } n  t |  t ƒ s- |  } n* t |  ƒ rH t |  ƒ } n t |  d ƒ } | j | d | ƒS(   s·   Parse a filename, URL, or file-like object into an HTML document
    tree.  Note: this returns a tree, not an element.  Use
    ``parse(...).getroot()`` to get the document root.
    t   rbR   N(   R   R   R   R   t   _looks_like_urlR   t   openR   (   t   filename_url_or_fileR!   R"   t   fp(    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyR   ²   s    		c         C   sV   t  |  ƒ d } | s t St j d k rN | t j k rN t | ƒ d k rN t St Sd  S(   Ni    t   win32i   (   R   R   t   syst   platformt   stringt   ascii_lettersR.   t   True(   t   strt   scheme(    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyR@   Â   s    ('   R   RE   RG   t   html5libR    R
   t    html5lib.treebuilders.etree_lxmlR   t   lxmlR   t	   lxml.htmlR   R   R   t
   basestringR   t	   NameErrort   bytesRJ   t   urllib2R   t   ImportErrort   urllib.requestR   t   urllib.parseR   R   t   xhtml_parserR   RI   R   R#   R   R*   R5   R>   R   R@   R   (    (    (    s9   /usr/lib/python2.7/dist-packages/lxml/html/html5parser.pyt   <module>   sF   
		(*	