#!/usr/bin/python

# TextToGoogleImages.py
#       --copyright--                   Copyright 2007 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       March 3, 2005           bar
#       March 4, 2005           bar     --html option
#       March 19, 2005          bar     give 'em back the whole image information
#       March 19, 2005          bar     --help, etc
#                                       cache results
#                                       MAX_WORDS of 8 for better binary searching
#       March 20, 2005          bar     more safe coding
#                                       allow size to be specified for montage images
#       March 22, 2005          bar     handle no images situation
#       November 18, 2007       bar     turn on doxygen
#       November 27, 2007       bar     insert boilerplate copyright
#       May 17, 2008            bar     email adr
#       May 27, 2012            bar     doxygen namespace
#       --eodstamps--
##      \file
#       \namespace              tzpython.TextToGoogleImages
#
#
#       Convert text to Google image urls.
#
#
#       TODO:
#
#           return all the info, not just the urls
#
#           binary search 1..10 rather than starting from 10 words
#
#

import  random
import  re
import  string

import  GoogleSearch
import  image_overlay_montage
import  url_getter


__ALL__ = [
            'convert_text_to_google_image_urls',
            'convert_short_text_to_google_image_urls',
            'make_montage_image_from_urls',

            'MAX_WORDS',
          ]


MAX_WORDS   =   8


def google_textify(txt) :

    txt = re.sub(r"_",                "-", txt)
    txt = re.sub(r"\"",               "'", txt)
    txt = re.sub(r"[\000-\037]+",     " ", txt)
    txt = re.sub(r"[^ 0-9a-zA-Z\'-]", "",  txt)

    return(txt.strip())


results = {}

def convert_text_to_google_image_urls(txt, safe = True, rnd_image = False, number_of_results = None, max_words = None) :
    """
        Given a string, return Google image urls for it.
    """

    if  max_words == None : max_words   = MAX_WORDS
    max_words                           = max(max_words, 1)
    max_words                           = max(max_words, MAX_WORDS)

    global  results

    urls    = []

    txt     = google_textify(txt)

    googler = GoogleSearch.a_google_html_querier()

    googler.set_safe_search(safe)

    if  number_of_results != None :
        googler.set_number_of_results(number_of_results)
    elif    not rnd_image :
        googler.set_number_of_results(1)

    txt     = string.split(txt)

    while   len(txt) != 0 :
        fii = None
        fi  = -1

        hi  = min(max_words, len(txt))
        low = 0
        mid = low
        while   hi > low :

            mid         = (low + hi) / 2

            wrds        = string.join(txt[0:mid + 1])

            rk          = str(rnd_image) + "''" + wrds
            htm         = results.get(rk)
            if  not htm :
                htm     = googler.do_image_query("\"" + wrds + "\"", clean_the_query = True)
                results[rk] = htm

            image_info  = googler.find_image_info(htm)

            if  len(image_info) > 0 :
                if  mid > fi :
                    fii = image_info
                    fi  = mid + 1
                    fw  = wrds
                    fc  = googler.page_count()
                low     = mid + 1
            else :
                hi      = mid
            pass

        if  fii == None :
            # print "Dropped", txt[0:1]

            urls.append( [ None, txt[0:1], 0 ] )        # dropped word has no image information and the count is zero
            txt  = txt[1:]                              # nothing found for even the first word, so just drop it and go on
        else :
            # print "Done", fc, fw, rnd_image, len(fii)

            i                 = 0
            if  rnd_image : i = random.randint(0, len(fii) - 1)

            # print "i=", i

            urls.append( [ fii[i], fw, fc ] )

            txt = txt[fi:]

        pass


    return(urls)                # return array of [ GoogleSearch_images_array([6]), words, page_count ]




def convert_short_text_to_google_image_urls(txt, min_url_count = None, safe = True, rnd_image = False) :
    """
        Convert a string to Google image urls.
        But make sure that there are at least the given number of urls.
        If the 1st pass doesn't make the count, get 'em at random until the count is matched.
        There may be dupes.
    """

    if  min_url_count == None : min_url_count  = 8

    # print "[" + txt + "]"

    urls      = filter(lambda u : u[0] != None, convert_text_to_google_image_urls(txt, safe, rnd_image))

    while len(urls) < min_url_count :
        # print   "url cnt:", len(urls)
        nu    = filter(lambda u : u[0] != None, convert_text_to_google_image_urls(txt, safe, True, None, 1))
        if  len(nu) == 0 :  break
        urls += nu
    # print   "url cnt:", len(urls)

    lcnt          = 2
    pcnt          = 0
    while True :

        uh        = {}
        for u in urls :
            uh[u[0][0]] = u

        urls = uh.values()                                      # weed the dupes out of the first of 'em

        # print "len", len(urls)

        if  len(urls) >= min_url_count :    break
        if  len(urls) == pcnt :             break

        pcnt      = len(urls)

        urls     += filter(lambda u : u[0] != None, convert_text_to_google_image_urls(txt, safe, True, None, lcnt))

        if  lcnt  > 1 :
            lcnt -= 1

        pass

    return(urls)




def make_montage_image_from_urls(urls, twidth = None, thite = None, width = None, hite = None) :
    """
        Given a return value from convert_text_to_google_image_urls(),
        return a montage Image or None, if there are no images.
    """

    if  urls == None :
        urls    = []

    # print "urls len=", len(urls)

    rurls       = map(lambda u : u[0][0], urls)

    rimgs       = url_getter.url_open_read_with_timeout(rurls)

    images      = []
    for ri in range(0, len(urls)) :
        u       = urls[ri]
        img     = rimgs[ri]

        if  img != None :
            # print "  ok len:", len(img), "pag cnt:", u[2]
            images.append( [ u[2], img ] )              # append the google page count and the image
        pass


    images.sort()                                       # make the lowest page count image the last one laid down in the montage

    images      = map(lambda v : v[1], images)

    if  len(images) :
        return(image_overlay_montage.make_montage_image_from_images(images, twidth, thite, width, hite))

    return(None)









if  __name__ == '__main__' :
    import  sys

    import  TZCommandLineAtFile
    import  tzlib


    help_str = """
python TextToGoogleImages (options)   in_file   out_file
python TextToGoogleImages (options) --text text out_file

Convert text from input file or stdin to Google images.

--real_images           Use the real, full image links.
--safe                  Use 'safe' search.
--random                Use random image if there is a choice.
--html                  Output links in IMG tags in HTML TABLE.
--htm                   Output links in IMG tags in HTML TABLE.
--montage               Output montage image file composed of images.
--short min_url_cnt     Treat text as 'short' text and insure at least given URL count.
--text  text            Do the given text (one allowed - no input file/stdin used).

"""


    del(sys.argv[0])

    TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv)

    if  tzlib.array_find(sys.argv, "--help") >= 0 :
        print help_str
        sys.exit(254)
    if  tzlib.array_find(sys.argv, "-h") >= 0 :
        print help_str
        sys.exit(254)
    if  tzlib.array_find(sys.argv, "-?") >= 0 :
        print help_str
        sys.exit(254)



    real_images         = False
    safe                = False
    do_rand             = False
    do_html             = False
    do_montage          = False
    short_cnt           = None
    txt                 = None

    while True :
        oi  = tzlib.array_find(sys.argv, "--real_images")
        if  oi < 0 :    break
        del sys.argv[oi]
        real_images     = True

    while True :
        oi  = tzlib.array_find(sys.argv, "--safe")
        if  oi < 0 :    break
        del sys.argv[oi]
        safe            = True

    while True :
        oi  = tzlib.array_find(sys.argv, "--random")
        if  oi < 0 :    break
        del sys.argv[oi]
        do_rand         = True

    while True :
        oi  = tzlib.array_find(sys.argv, "--short")
        if  oi < 0 :    break
        del sys.argv[oi]
        short_cnt       = int(sys.argv.pop(0))

    while True :
        oi  = tzlib.array_find(sys.argv, "--html")
        if  oi < 0 :    break
        del sys.argv[oi]
        do_html         = True
        if  do_montage :
            print "HTML or montage?"
            sys.exit(101)
        pass

    while True :
        oi  = tzlib.array_find(sys.argv, "--htm")
        if  oi < 0 :    break
        del sys.argv[oi]
        do_html         = True
        if  do_montage :
            print "HTML or montage?"
            sys.exit(101)
        pass

    while True :
        oi  = tzlib.array_find(sys.argv, "--montage")
        if  oi < 0 :    break
        del sys.argv[oi]
        do_montage      = True
        if  do_html :
            print "HTML or montage?"
            sys.exit(101)
        pass

    while True :
        oi  = tzlib.array_find(sys.argv, "--text")
        if  oi < 0 :    break
        del sys.argv[oi]
        txt             = sys.argv.pop(0)


    fo  = sys.stdout

    if  txt == None :
        fi  = sys.stdin

        if  len(sys.argv) != 0 :
            ifname  = sys.argv.pop(0)
            fi      = open(ifname, "r")

        txt = fi.read()

        if  fi != sys.stdin :   fi.close()

    if  short_cnt == None :
        urls = convert_text_to_google_image_urls(txt, safe, do_rand)
    else :
        urls = convert_short_text_to_google_image_urls(txt, short_cnt, safe)

    if  do_html :

        if  len(sys.argv) != 0 :
            ofname  = sys.argv.pop(0)
            fo      = open(ofname, "w")

        print >> fo, "<TABLE>"

        for u in urls :
            if  u[0] == None :
                print >> fo, "<TR><TD>%s</TD></TR>"                          % ( u[1]           )
            else :
                bi                      = 0
                if  real_images :   bi  = 4
                print >> fo, "<TR><TD>%s</TD><TD><img src=\"%s\"></TD></TR>" % ( u[1], u[0][bi] )
            pass

        print >> fo, "</TABLE>"

    elif not do_montage :

        if  len(sys.argv) != 0 :
            ofname  = sys.argv.pop(0)
            fo      = open(ofname, "w")

        for u in urls :
            if  u[0] == None :
                print >> fo, "%-50s"    % ( u[1]           )
            else :
                bi                      = 0
                if  real_images :   bi  = 4
                print >> fo, "%-50s %s" % ( u[1], u[0][bi] )
            pass
        pass

    else :

        if  len(sys.argv) != 1 :
            print "Tell me the montage file name!"
            sys.exit(101)

        io  = make_montage_image_from_urls(urls)

        io.save(sys.argv.pop(0))


    if  fo != sys.stdout :   fo.close()

    pass


#
#
#
# eof
