#!/usr/bin/python

# united_comics.py
#       --copyright--                   Copyright 2007 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       July 31, 2004           bar
#       August 8, 2004          bar     strip all duped blank lines
#       March 21, 2005          bar     whack old .bak files
#       November 18, 2007       bar     turn on doxygen
#       November 27, 2007       bar     insert boilerplate copyright
#       May 17, 2008            bar     email adr
#       May 27, 2012            bar     doxygen namespace
#       September 15, 2013      bar     specify the .bak base as 10
#       --eodstamps--
##      \file
#       \namespace              tzpython.united_comics
#
#
#       Get the current URLs for all the United Features comics image files.
#
#


import  re
import  string

import  tzlib
import  url_getter
import  replace_file


__all__     = [
                'get_united_comics_urls',
                'get_united_gif_urls',
                'get_new_united_comics',
              ]


united_comics_url   = "http://www.unitedmedia.com"


comics_urls         = re.compile(r"<OPTION\s+VALUE=\"([^\"]+)\"",   re.DOTALL + re.IGNORECASE)

gif_url             = re.compile(r"\&Attachments=([^\&]+)\&",       re.DOTALL + re.IGNORECASE)


def get_united_comics_urls() :
    """
        Get the URLs for all the current United Features comics.
    """

    urls  = []

    main_page = url_getter.url_open_read_with_timeout(united_comics_url + "/mycomics/html/categories_alpha.html")
    if  main_page != None :
        g = comics_urls.findall(main_page)
        if  g != None :
            urls = map(lambda u : str(united_comics_url + u), g)
        pass

    return(urls)




def get_united_gif_urls() :
    """
        Get the URLs for all the current United Features comics GIF files.
    """

    purls = get_united_comics_urls()

    urls  = []
    for url in purls :
        page = url_getter.url_open_read_with_timeout(url)

        if  page  != None :
            g      = gif_url.search(page)
            if  g != None :
                u  = united_comics_url + g.group(1)
                urls.append(u)
            pass
        pass

    return(urls)



def get_new_united_comics(current_urls = None) :
    """
        Given an array of United Features comics URLs, get any new ones that are on-line now.

    """

    if  current_urls == None :  current_urls = []

    curls    = tzlib.make_dictionary(map(string.strip, current_urls))

    urls     = get_united_gif_urls()
    if  len(urls) == 0 :
        print "No United Features URLs found!"

    new_urls = []
    for url in urls :
        url = url.strip()
        if  len(url) and not curls.has_key(url) :
            new_urls.append(url)
            curls[url] = True
        pass

    return(new_urls)


#
#
#
if __name__ == '__main__' :
    """

        python united_comics file_name      Put URLs of new comics at the end of the file.
        python united_comics                Print all the comics' URLs.

        If a command line parameter is given, it's a file name.

        The file is updated with the URLs of whatever new comics'
        images are not listed in the file.

        Otherwise, this program simply prints all the comics' URLs.

    """

    import  sys

    old = []
    fn  = None
    if  len(sys.argv) > 1 :
        fn = sys.argv[1]

        f  = open(fn, "r")
        if  not f :
            print "Cannot open", fn
            sys.exit(101)

        blank = False
        while True :
            s = f.readline()
            if  not s : break

            s = s.rstrip()

            if  len(s.strip()) != 0 :
                old.append(s)
                blank = False
            else :
                if  not blank :
                    old.append(s)
                blank = True
            pass

        f.close()

    urls = get_new_united_comics(old)

    if  fn == None :

        print urls

    else :
        tn   = fn + "_tmp"
        f    = open(tn, "w")

        for u in old :
            print >> f, u

        print >> f
        print >> f, "<HR>"
        print >> f

        for u in urls :
            print >> f, "<IMG    SRC=\"%s\">" % ( u )
            print >> f, "<BR><A HREF=\"%s\">" % ( u )
            print >> f,  "             %s"    % ( u )
            print >> f, "</A><HR>"
            print >> f

        f.close()

        replace_file.replace_file(fn, tn, fn + "_NNNNN.bak", base = 10)

        replace_file.erase_old_N_files(fn + "_NNNNN.bak")

    pass


#
#
#
# eof
