#!/usr/bin/python

# album_art.py
#       --copyright--                   Copyright 2007 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       March 2, 2005           bar
#       March 7, 2005           bar     protection code
#       November 18, 2007       bar     turn on doxygen
#       November 27, 2007       bar     insert boilerplate copyright
#       May 17, 2008            bar     email adr
#       May 27, 2012            bar     doxygen namespace
#       March 5, 2023           bar     future print
#       --eodstamps--
##      \file
#       \namespace              tzpython.album_art
#
#
#       Get album covers from somewhere or other.
#
#

from    __future__  import  print_function

import  re
import  sys
import  urllib
import  urllib2

import  url_getter


__ALL__ = [
            'get_urls_from_htm'
            'get_urls_from_net'
            'get_images_from_urls'
          ]


opener            = urllib2.build_opener()
opener.addheaders = []                  # get rid of 'User-agent' the only way that seems to work (yes, I tried lower-casing 'Agent')
urllib2.install_opener(opener)


hosts   =   [
                [ 'http://www.slothradio.com/covers/?adv=0&artist=%A&album=%L',                     # %A==artist %L==album %T==title
                    r"(.*)",                                                                        # all of it
                    r"width=\"305\"\s+valign=\"top\"><img\s+src=\"([^\"]+)",                        # each image URL
                ],

            ]


for h in hosts :
    if  len(h) != 3 :
        print("Put the commas in after the regxes!")
        sys.exit(101)
    for i in range(1, len(h)) :
        h[i] = re.compile(h[i], re.DOTALL + re.IGNORECASE)
    pass


def do_request(req, timeout = None) :

    req.add_header('User-Agent',       'LABrowser/00.01 Graph/01.00 Text/01.00 Gen/01.00')
    req.add_header('Accept',           'text/html,image/jpeg,image/png,image/gif,image/bmp,image/jpg')
    req.add_header('Accept-Language',  'en-us,en')
    req.add_header('Accept-Charset',   'ISO-8859-1,utf-8')

    f = url_getter.url_open_read_with_timeout(req, timeout)

    return(f)


def get_urls_from_htm(htm) :

    urls    = []
    if  htm :
        for h in hosts :

            g   = h[1].search(htm)
            if  g :
                sh  = g.group(1)

                lg  = h[2].findall(sh)
                if  lg and len(lg):
                    urls += lg
                pass
            pass
        pass

    return(urls)


def get_urls_from_net(album, artist = None, title = None, timeout = None) :

    urls        = []

    if  artist == None :    artist  = ""
    if  album  == None :    album   = ""
    if  title  == None :    title   = ""

    if  len(album) or len(artist) or len(title) :

        artist      = urllib.quote_plus(artist)
        title       = urllib.quote_plus(title)
        album       = urllib.quote_plus(album)

        for h in hosts :
            q       = h[0]

            q       = q.replace('%A', artist)
            q       = q.replace('%L', album)
            q       = q.replace('%T', title)

            htm     = do_request(urllib2.Request(q))

            urls   += get_urls_from_htm(htm)

        pass

    return(urls)


def get_images_from_urls(urls) :

    images  = []

    for u in urls :
        img = do_request(urllib2.Request(u))

        typ = ".unk"
        if      u.find(".jpeg") >= 0 :
            typ = ".jpg"
        elif    u.find(".jpg") >= 0 :
            typ = ".jpg"
        elif    u.find(".gif") >= 0 :
            typ = ".gif"
        elif    u.find(".png") >= 0 :
            typ = ".png"
        elif    u.find(".bmp") >= 0 :
            typ = ".bmp"

        images.append( [ img, typ ] )

    return(images)


if  __name__ == '__main__' :

    if  len(sys.argv) < 2 :

        print(  "Tell me a album name (or a .htm file).")

    else :

        import  TZCommandLineAtFile

        del(sys.argv[0])

        TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv)

        while   len(sys.argv) > 0 :

            ss  = sys.argv.pop(0)

            if  ss.endswith(".htm") or ss.endswith(".html") :

                htm     = open(ss, "rb").read()

                urls    = get_urls_from_htm(htm)

                print(urls)

            else :
                urls    = get_urls_from_net(ss)

                pass

            images      = get_images_from_urls(urls)

            for i in images :
                print(len(i[0]), i[1])

            pass
        pass

    pass


#
#
#
# eof