
\[\4                 @   sd   d  Z  d d l Z d d l Z d g Z Gd d   d  Z Gd d   d  Z Gd d   d  Z d S)	a%   robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
    NRobotFileParserc               @   s   e  Z d  Z d Z d d d  Z d d   Z d d   Z d	 d
   Z d d   Z d d   Z	 d d   Z
 d d   Z d d   Z d S)r   zs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

     c             C   s>   g  |  _  d  |  _ d |  _ d |  _ |  j |  d |  _ d  S)NFr   )entriesdefault_entrydisallow_all	allow_allset_urllast_checked)selfurl r   (/usr/lib/python3.4/urllib/robotparser.py__init__   s    				zRobotFileParser.__init__c             C   s   |  j  S)zReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r	   )r
   r   r   r   mtime   s    zRobotFileParser.mtimec             C   s   d d l  } | j    |  _ d S)zYSets the time the robots.txt file was last fetched to the
        current time.

        r   N)timer	   )r
   r   r   r   r   modified(   s    zRobotFileParser.modifiedc             C   s5   | |  _  t j j |  d d  \ |  _ |  _ d S)z,Sets the URL referring to a robots.txt file.      N)r   urllibparseurlparseZhostpath)r
   r   r   r   r   r   0   s    	zRobotFileParser.set_urlc             C   s   y t  j j |  j  } Wnp t  j j k
 r } zJ | j d k rO d |  _ n* | j d k ry | j d k  ry d |  _ n  WYd d } ~ Xn) X| j	   } |  j
 | j d  j    d S)	z4Reads the robots.txt URL and feeds it to the parser.    Ti  i  Nzutf-8)r   r   )r   ZrequestZurlopenr   errorZ	HTTPErrorcoder   r   readr   decode
splitlines)r
   ferrrawr   r   r   r   5   s    zRobotFileParser.readc             C   sA   d | j  k r- |  j d  k r= | |  _ q= n |  j j |  d  S)N*)
useragentsr   r   append)r
   entryr   r   r   
_add_entryB   s    zRobotFileParser._add_entryc             C   s  d } t    } |  j   x| D]} | sx | d k rJ t    } d } qx | d k rx |  j |  t    } d } qx n  | j d  } | d k r | d |  } n  | j   } | s q  n  | j d d  } t |  d k r  | d j   j   | d <t j	 j
 | d j    | d <| d d k rn| d k rQ|  j |  t    } n  | j j | d  d } q| d d k r| d k r| j j t | d d	   d } qq| d d
 k r| d k r| j j t | d d   d } qqq  q  W| d k r|  j |  n  d S)zParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        r   r      #N:z
user-agentZdisallowFZallowT)Entryr   r&   findstripsplitlenlowerr   r   unquoter#   r$   	rulelinesRuleLine)r
   linesstater%   lineir   r   r   r   K   sL    
	
			 	zRobotFileParser.parsec             C   s   |  j  r d S|  j r d S|  j s' d St j j t j j |   } t j j d d | j | j	 | j
 | j f  } t j j |  } | s d } n  x- |  j D]" } | j |  r | j |  Sq W|  j r |  j j |  Sd S)z=using the parsed robots.txt decide if useragent can fetch urlFTr   /)r   r   r	   r   r   r   r0   
urlunparser   ZparamsZqueryZfragmentquoter   
applies_to	allowancer   )r
   	useragentr   Z
parsed_urlr%   r   r   r   	can_fetch   s$    					zRobotFileParser.can_fetchc             C   s   d j  d d   |  j D  S)Nr   c             S   s    g  |  ] } t  |  d   q S)
)str).0r%   r   r   r   
<listcomp>   s   	 z+RobotFileParser.__str__.<locals>.<listcomp>)joinr   )r
   r   r   r   __str__   s    zRobotFileParser.__str__N)__name__
__module____qualname____doc__r   r   r   r   r   r&   r   r=   rC   r   r   r   r   r      s   		4c               @   s:   e  Z d  Z d Z d d   Z d d   Z d d   Z d S)	r2   zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c             C   s\   | d k r | r d } n  t  j j t  j j |   } t  j j |  |  _ | |  _ d  S)Nr   T)r   r   r8   r   r9   r   r;   )r
   r   r;   r   r   r   r      s
    	zRuleLine.__init__c             C   s   |  j  d k p | j |  j   S)Nr"   )r   
startswith)r
   filenamer   r   r   r:      s    zRuleLine.applies_toc             C   s   |  j  r d p d d |  j S)NZAllowZDisallowz: )r;   r   )r
   r   r   r   rC      s    zRuleLine.__str__N)rD   rE   rF   rG   r   r:   rC   r   r   r   r   r2      s   r2   c               @   sF   e  Z d  Z d Z d d   Z d d   Z d d   Z d d	   Z d
 S)r*   z?An entry has one or more user-agents and zero or more rulelinesc             C   s   g  |  _  g  |  _ d  S)N)r#   r1   )r
   r   r   r   r      s    	zEntry.__init__c             C   sj   g  } x' |  j  D] } | j d | d g  q Wx* |  j D] } | j t |  d g  q: Wd j |  S)NzUser-agent: r>   r   )r#   extendr1   r?   rB   )r
   Zretagentr5   r   r   r   rC      s    zEntry.__str__c             C   s]   | j  d  d j   } x= |  j D]2 } | d k r9 d S| j   } | | k r# d Sq# Wd S)z2check if this entry applies to the specified agentr7   r   r"   TF)r-   r/   r#   )r
   r<   rK   r   r   r   r:      s    zEntry.applies_toc             C   s.   x' |  j  D] } | j |  r
 | j Sq
 Wd S)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r1   r:   r;   )r
   rI   r5   r   r   r   r;      s    zEntry.allowanceN)rD   rE   rF   rG   r   rC   r:   r;   r   r   r   r   r*      s
   r*   )rG   Zurllib.parser   Zurllib.request__all__r   r2   r*   r   r   r   r   <module>   s
   	