#!/usr/bin/python

# info_sniper_geolocate_ip_adr.py
#       --copyright--                   Copyright 2012 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       March 27, 2012          bar
#       May 27, 2012            bar     doxygen namespace
#       --eodstamps--
##      \file
#       \namespace              tzpython.info_sniper_geolocate_ip_adr
#
#
#       Get Info Sniper's geolocation information about an IP address.
#
#

import  re
import  time
import  urllib2

import  tzlib
import  url_getter


opener            = urllib2.build_opener()
opener.addheaders = []                  # get rid of 'User-agent' the only way that seems to work (yes, I tried lower-casing 'Agent')
urllib2.install_opener(opener)



def get_request(req, timeout = None) :
    f   = ""
    if  True :
        req.add_header('User-Agent',       'Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.5) Gecko/20031007')
        req.add_header('Accept',           'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1')
        req.add_header('Accept-Language',  'en-us,en;q=0.5')
        # req.add_header('Accept-Encoding',  'gzip,deflate')
        req.add_header('Accept-Charset',   'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
        # req.add_header('Keep-Alive',       '300')
        # req.add_header('Connection',       'keep-alive')

    elif False :

        req.add_header('User-Agent',       'TZYBrowser/00.01 Graph/01.00 Text/01.00 Gen/01.00')
        req.add_header('Accept',           'text/html,image/png,image/jpeg,image/gif,image/bmp,image/jpg')
        req.add_header('Accept-Language',  'en-us,en')
        req.add_header('Accept-Charset',   'ISO-8859-1,utf-8')

    f   = url_getter.url_open_read_with_timeout(req, timeout)

    return(f)




parse_regx  = re.compile("".join([
                                    r""".*?<td\s+class="content-td1">IP Address</td>""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td class="content-td1">Provider</td>""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td class="content-td1">Hostname</td>""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td class="content-td1">Timezone</td>""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                    r""".*?<td\s+class="content-td2">([^<]*)""",
                                 ]), re.DOTALL)




class   a_location(object) :

    @staticmethod
    def _fix_val(v)     :
        v   = v or None
        if  v != None   :
            v           = tzlib.decode_html_entities(str(v), ' ').strip()
            v           = re.sub(r"\s+", " ", v)
            if  v.lower().startswith("n/a") :
                v       = None
            pass
        return(v)


    def __init__(me, ip_adr, provider, hostname, timezone, city, state, country, continent, lat, lon, tld, dma_code, area_code, postal_code, gmt_offset) :
        me.ip_adr       = a_location._fix_val(ip_adr)
        me.provider     = a_location._fix_val(provider)
        me.hostname     = a_location._fix_val(hostname)
        me.timezone     = a_location._fix_val(timezone)
        me.city         = a_location._fix_val(city)
        me.state        = a_location._fix_val(state)
        me.country      = a_location._fix_val(country)
        me.continent    = a_location._fix_val(continent)
        me.lat          = a_location._fix_val(lat)
        me.lon          = a_location._fix_val(lon)
        me.tld          = a_location._fix_val(tld)
        me.dma_code     = a_location._fix_val(dma_code)
        me.area_code    = a_location._fix_val(area_code)
        me.postal_code  = a_location._fix_val(postal_code)
        me.gmt_offset   = a_location._fix_val(gmt_offset)


    def __str__(me) :
        s   = " ".join([
                        "IP: %s"            % (me.ip_adr             or '_'),
                        "Provider: %s"      % (me.provider           or '_'),
                        "Host: %s"          % (me.hostname           or '_'),
                        "TLD: %s"           % (me.tld                or '_'),
                        "City: %s"          % (me.city               or '_'),
                        "State: %s"         % (me.state              or '_'),
                        "Country: %s"       % (me.country            or '_'),
                        "Continent: %s"     % (me.continent          or '_'),
                        "DMA: %s"           % (me.dma_code           or '_'),
                        "Area code: %s"     % (me.area_code          or '_'),
                        "Postal code: %s"   % (me.postal_code        or '_'),
                        "TZ: %s"            % (me.timezone           or '_'),
                        "GMT offset: %s"    % (me.gmt_offset         or '_'),
                        "Lat/lon: %s, %s"   % (me.lat or '_', me.lon or '_'),
                      ])
        s   = re.sub(r"\s+", " ", s)

        if  False   :
            try     :
                s   = str(s)                    # can convert to escaped latin1
            except UnicodeError :
                s   = unicode(s).encode('unicode_escape')
            pass
        else        :
            s       = s.encode('utf8')          # str() doesn't like unicode characters

        return(s)


    @staticmethod
    def parse(htm) :
        if  htm     :
            try     :
                htm = htm.decode('utf8')
            except UnicodeError :
                pass
            g       = parse_regx.search(htm)
            if  g   :
                return(a_location(g.group(1), g.group(5), g.group(9), g.group(13), g.group(2), g.group(6), g.group(10), g.group(14), g.group(7), g.group(11), g.group(15), g.group(4), g.group(8), g.group(12), g.group(16)))
            pass

        return(None)


    @staticmethod
    def get(ip_adr, retries = 0, timeout = None, verbose = 0) :
        retries = retries or 0
        timeout = timeout or None
        verbose = verbose or 0

        cnt     = 0
        while True :
            req     = urllib2.Request("http://www.infosniper.net/index.php?ip_address=%s" % ip_adr)
            f       = get_request(req, timeout = timeout)
            if  f   :
                break
            cnt    += 1
            if  cnt >= retries :
                break
            time.sleep(3)
        return(a_location.parse(f))

    #   a_location


if  False :
    fd  = tzlib.read_whole_binary_file('x.y')
    loc = a_location.parse(fd)
    print loc

    exit(1)




if  __name__ == '__main__' :
    import  os
    import  random
    import  sys

    import  TZCommandLineAtFile


    program_name    = sys.argv.pop(0)

    TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv)

    timeout     =   None
    retries     =   0
    wait_time   =   0
    verbose     =   0

    if  (tzlib.array_find(sys.argv, "--help") >= 0) or (tzlib.array_find(sys.argv, "-h") >= 0) or (tzlib.array_find(sys.argv, "-?") >= 0) :
        print """
%s  Get IP address geolocation.

--timeout       seconds Set the web-bit timeout.
--retries       cnt     Set the number of retries.
--delay         seconds How long to delay between each download.

""" % ( os.path.basename(program_name), )
        sys.exit(254)



    while True :
        oi  = tzlib.array_find(sys.argv, [ "--timeout", "--to", "-t", "-w" ])
        if  oi < 0 :    break
        del sys.argv[oi]
        timeout         = int(sys.argv.pop(oi))

    while True :
        oi  = tzlib.array_find(sys.argv, [ "--delay", "-d" ])
        if  oi < 0 :    break
        del sys.argv[oi]
        wait_time       = int(sys.argv.pop(oi))

    while True :
        oi  = tzlib.array_find(sys.argv, [ "--retries", "-r" ])
        if  oi < 0 :    break
        del sys.argv[oi]
        retries         = int(sys.argv.pop(oi))

    while True :
        oi  = tzlib.array_find(sys.argv, [ "--verbose", "-v" ])
        if  oi < 0 :    break
        del sys.argv[oi]
        if  not verbose :
            verbose     = 0
        verbose        += 1


    adrs    = sys.argv

    adrs    = tzlib.without_dupes(sys.argv)

    ot      = tzlib.elapsed_time() - wait_time

    while len(adrs) > 0 :

        wt  = random.random() * wait_time + wait_time / 2
        while tzlib.elapsed_time() < ot + wt :
            time.sleep(0.11)

        adr = adrs.pop(0)

        if  adr[0:1] == '-' :
            print "Did you mean for", adr, "to be a command line parameter?"

        loc = a_location.get(adr, retries = retries, timeout = timeout, verbose = verbose)
        print "adr:", loc

        ot  = tzlib.elapsed_time()

    pass


#
#
#
# eof
