#!/usr/bin/python

# motley_fool_caps.py
#       --copyright--                   Copyright 2009 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       January 1, 2009         bar
#       January 2, 2009         bar
#       February 5, 2009        bar     don't crash on new page
#       November 29, 2011       bar     pyflake cleanup
#       May 27, 2012            bar     doxygen namespace
#       --eodstamps--
##      \file
#       \namespace              tzpython.motley_fool_caps
#
#
#       Get or update information from the Motley Fool's CAP pages for given symbol(s).
#
#

import  os
import  re
import  urllib
import  urllib2

import  tzlib
import  url_getter


opener            = urllib2.build_opener()
opener.addheaders = []                  # get rid of 'User-agent' the only way that seems to work (yes, I tried lower-casing 'Agent')
urllib2.install_opener(opener)



def get_request(req, timeout = None) :
    f   = ""
    if  True :
        req.add_header('User-Agent',       'Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.5) Gecko/20031007')
        req.add_header('Accept',           'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1')
        req.add_header('Accept-Language',  'en-us,en;q=0.5')
        # req.add_header('Accept-Encoding',  'gzip,deflate')
        req.add_header('Accept-Charset',   'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
        # req.add_header('Keep-Alive',       '300')
        # req.add_header('Connection',       'keep-alive')

    elif False :

        req.add_header('User-Agent',       'TZYBrowser/00.01 Graph/01.00 Text/01.00 Gen/01.00')
        req.add_header('Accept',           'text/html,image/png,image/jpeg,image/gif,image/bmp,image/jpg')
        req.add_header('Accept-Language',  'en-us,en')
        req.add_header('Accept-Charset',   'ISO-8859-1,utf-8')

    f   = url_getter.url_open_read_with_timeout(req, timeout)

    return(f)




def clean_symbol(sym) :
    """
        Clean up the company symbol as best we can.
    """

    if  sym :
        sym   = re.sub(r"[^a-zA-Z\.\^\-]", "", sym).strip().upper()

    return(sym)




def _re_int_get(regx, s) :
    g   = regx.search(s)
    if  not g :
        return(None)

    s   = re.sub(r"[^\d]", "", g.group(1))
    try :
        r   = int(s)
    except ValueError :
        return(None)

    return(r)





class   a_caps(object) :

    rating_re               = re.compile(r'<img\s+id\s*=\s*"ctl00_cphMaster_ctlTickerSummaryHeader_imgStarRating"\s+title\s*=\s*"(\d Stars|Unrated)',   re.DOTALL)
    total_members_out_re    = re.compile(r'<span\s+id\s*=\s*"ctl00_cphMaster_allstars_lblOutPerformFoolsInGame">(\d+)</span>',                  re.DOTALL)
    total_members_und_re    = re.compile(r'<span\s+id\s*=\s*"ctl00_cphMaster_allstars_lblUnderPerformFoolsInGame">(\d+)</span>',                re.DOTALL)
    allstars_out_re         = re.compile(r'<span\s+id\s*=\s*"ctl00_cphMaster_allstars_lblAllStarOutPerformFoolsInGame">(\d+)</span>',           re.DOTALL)
    allstars_und_re         = re.compile(r'<span\s+id\s*=\s*"ctl00_cphMaster_allstars_lblAllStarUnderPerformFoolsInGame">(\d+)</span>',         re.DOTALL)
    wallstreet_out_re       = re.compile(r'<span\s+id\s*=\s*"ctl00_cphMaster_allstars_lblVirtualOutPerformFoolsInGame">(\d+)</span>',           re.DOTALL)
    wallstreet_und_re       = re.compile(r'<span\s+id\s*=\s*"ctl00_cphMaster_allstars_lblVirtualUnderPerformFoolsInGame">(\d+)</span>',         re.DOTALL)

    collab_re               = re.compile(r'<div\s+id\s*=\s*"divCollaborativeFilter">(.*?)</div>',   re.DOTALL)
    bullish_re              = re.compile(r'<h4>Fools bullish(.*?)</div>',                           re.DOTALL)
    bearish_re              = re.compile(r'<h4>Fools bearish(.*?)</div>',                           re.DOTALL)
    aspx_re                 = re.compile(r'\shref\s*=\s*([^\.]*?)\.aspx>',                          re.DOTALL)



    class   a_recommend(object) :
        def __init__(me, out = 0, under = 0) :
            me.out   = out
            me.under = under
        def __str__(me) :
            return("[%s,%s]" % ( str(me.out), str(me.under) ) )
        pass


    def __init__(me, sym, rating, total_members, allstars, wallstreet, bulls, bears) :

        me.sym              = sym

        me.rating           = rating
        me.total_members    = total_members
        me.allstars         = allstars
        me.wallstreet       = wallstreet
        me.bulls            = bulls
        me.bears            = bears


    def __str__(me) :
        return("Sym=%s Rating=%s Total=%s Allstars=%s Wallstreet=%s Bulls=%s Bears=%s" % ( me.sym, str(me.rating), str(me.total_members), str(me.allstars), str(me.wallstreet), str(me.bulls), str(me.bears) ) )


    @staticmethod
    def from_htm(htm, sym) :

        sym             = sym

        rating          = _re_int_get(a_caps.rating_re, htm)
        total_members   = a_caps.a_recommend(_re_int_get(a_caps.total_members_out_re, htm), _re_int_get(a_caps.total_members_und_re, htm))
        allstars        = a_caps.a_recommend(_re_int_get(a_caps.allstars_out_re,      htm), _re_int_get(a_caps.allstars_und_re,      htm))
        wallstreet      = a_caps.a_recommend(_re_int_get(a_caps.wallstreet_out_re,    htm), _re_int_get(a_caps.wallstreet_und_re,    htm))

        collabs         = a_caps.collab_re.findall(htm)
        if  len(collabs) != 2 :
            raise ValueError

            collabs     = [ "", "" ]
            bullcollab  = a_caps.bullish_re.findall(htm)
            if  bullcollab :
                collabs[0]  = bullcollab[0]
            bearcollab  = a_caps.bearish_re.findall(htm)
            if  bearcollab :
                collabs[1]  = bearcollab[0]
            pass

        bulls           = a_caps.aspx_re.findall(collabs[0])
        bears           = a_caps.aspx_re.findall(collabs[1])

        return(a_caps(sym, rating, total_members, allstars, wallstreet, bulls, bears))

    pass    # a_caps





def get_htm(sym, timeout = None, show_info = False) :
    """
        Get the information from a page from the web.
    """

    sym  = clean_symbol(sym)

    if  sym :

        url_sym = urllib.quote(sym)

        url     = "http://caps.fool.com/Ticker/%s.aspx" % url_sym.upper()

        if  show_info :
            print "Getting .csv for", sym, " ", url

        htm     = get_request(urllib2.Request(url), timeout)

        if  htm :
            return(htm)

        if  show_info :
            print "No .htm retrieved for", sym, str(htm)
        pass

    return(None)



def add_to_file(fname, sym, ot, caps) :
    if  os.path.isfile(fname) :
        fo  = open(fname, "a")
    else :
        fo  = open(fname, "w")

    print >> fo, "%11u %s :: %s" % ( ot, time.asctime(time.gmtime(ot)), caps )

    fo.close()
    del(fo)

    print caps



if  __name__ == '__main__' :
    import  glob
    import  random
    import  sys
    import  time

    if  len(sys.argv) < 2 :

        print   "Tell me a company symbol"

    else :

        import  TZCommandLineAtFile

        del(sys.argv[0])

        TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv)

        show_info   =   False
        timeout     =   None
        retries     =   0
        output_dir  =   ""
        refresh     =   False
        check_base  =   False
        wait_time   =   0
        update_all  =   False
        days_ago    =   0
        days_range  =   0

        if  (tzlib.array_find(sys.argv, "--help") >= 0) or (tzlib.array_find(sys.argv, "-h") >= 0) or (tzlib.array_find(sys.argv, "-?") >= 0) :
            print """
motley_fool_caps        Get Fool CAPS info for symbol(s) and add it to a file.

--output_dir    dir             Directory name to put the output file(s) in to. (output file name is lower_case_symbol.txt)
--show_info                     Print progress/debugging info.
--timeout       seconds         Set the web-hit timeout.
--retries       cnt             Set the number of web-hit retries.
--delay         seconds         How long to delay between each download.
--update_all                    Update all txt files in 'output_dir'.
--days_ago      days +-days     Set how many days ago the last hit must have been to do another hit (implies --update_all).

"""
            sys.exit(254)



        while True :
            oi  = tzlib.array_find(sys.argv, "--timeout")
            if  oi < 0 :    break
            del sys.argv[oi]
            timeout         = int(sys.argv.pop(oi))

        while True :
            oi  = tzlib.array_find(sys.argv, "--delay")
            if  oi < 0 :    break
            del sys.argv[oi]
            wait_time       = int(sys.argv.pop(oi))

        while True :
            oi  = tzlib.array_find(sys.argv, "--retries")
            if  oi < 0 :    break
            del sys.argv[oi]
            retries         = int(sys.argv.pop(oi))

        while True :
            oi  = tzlib.array_find(sys.argv, "--show_info")
            if  oi < 0 :    break
            del sys.argv[oi]
            if  not show_info :
                show_info   = 0
            show_info      += 1

        while True :
            oi  = tzlib.array_find(sys.argv, "--output_dir")
            if  oi < 0 :    break
            del sys.argv[oi]
            output_dir      =   sys.argv.pop(oi)

        while True :
            oi  = tzlib.array_find(sys.argv, "--update_all")
            if  oi < 0 :    break
            del sys.argv[oi]
            update_all      = True

        while True :
            oi  = tzlib.array_find(sys.argv, "--days_ago")
            if  oi < 0 :    break
            del sys.argv[oi]
            days_ago        = float(sys.argv.pop(oi)) * 60 * 60 * 24
            days_range      = float(sys.argv.pop(oi)) * 60 * 60 * 24
            update_all      = True


        if  output_dir      :  output_dir   = os.path.normpath(output_dir)

        if  not output_dir  :  output_dir   = "."

        if  not os.path.isdir(output_dir)   :   os.makedirs(output_dir)

        syms    = sys.argv

        if  update_all :
            syms   +=   map(lambda s : s[len(output_dir) + 1:-4], glob.glob(os.path.join(output_dir, "*.txt")))

        symsh   = tzlib.make_dictionary([ s.replace("_", "^").lower() for s in syms ])
        syms    = symsh.keys()

        ot      = time.time() - wait_time

        while len(syms) > 0 :

            random.shuffle(syms)                                        # shuffle them each time through the loop as we may have added some and we're really in no hurry

            wt  = random.random() * wait_time + wait_time / 2
            while time.time() < ot + wt :
                time.sleep(0.11)

            sym = syms.pop(0)

            if  sym[0:1] == '-' :
                print "Did you mean for", sym, "to be a command line parameter?"

            ot      =   time.time()

            fname   = os.path.join(output_dir, "%s.txt" % tzlib.file_name_able(sym.lower()) )

            rt      = days_ago + ((random.random() - 0.5) * days_range * 2)

            if  (not update_all) or (not os.path.isfile(fname)) or (ot - os.path.getmtime(fname) >= rt) :

                print "Doing", sym

                tfn = "/tmp/%s.htm" % sym
                if  False and os.path.isfile(tfn) :
                    htm = tzlib.read_whole_binary_file(tfn)
                else :
                    htm = get_htm(sym, timeout = timeout, show_info = show_info)

                if  not htm :

                    print "  No htm for", sym
                    add_to_file(fname, sym, ot, "")                     # put a place holder in

                else :

                    # tzlib.write_whole_binary_file(tfn, htm)
                    try :
                        cap     = a_caps.from_htm(htm, sym)
                    except ValueError :
                        cap     = None

                    if  cap :
                        add_to_file(fname, sym, ot, str(cap))

                        if  update_all :
                            for s in cap.bulls + cap.bears :
                                s   = s.lower()
                                if  s not in symsh :
                                    symsh[s]    = True
                                    syms.append(s)
                                pass
                            pass

                        pass
                    else :
                        print "  new htm, probably, for", sym
                    pass
                pass
            pass

        pass

    pass


#
#
#
# eof
