#!/usr/bin/python

# tz_type_ahead.py
#       --copyright--                   Copyright 2013 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       October 9, 2013         bar
#       --eodstamps--
##      \file
#       \namespace              tzpython.tz_type_ahead
#
#
#       Manage logic that suggests when a person might want to type given something he's already typed.
#
#       This is word completion, phrase completion, text prediction, auto-complete, etc.
#
#


import  difflib
import  re
import  sys

import  tzlib


def get_word(s) :
    return(re.search(r'^(\S+)', s.strip()).group(1))


class   a_settings(object) :
    """ Define an oject that has all our settings. """

    def min_longer(me, v    = None) :
        ov  = me._min_longer
        if  v != None :
            me._min_longer  = v
        return(ov)

    def max_first_match(me, v   = None) :
        ov  = me._max_first_match
        if  v != None :
            me._max_first_match = v
        return(ov)

    def max_length(me, v    = None) :
        ov  = me._max_length
        if  v != None :
            me._max_length  = v
        return(ov)

    def age_offset(me, v    = None) :
        ov  = me._age_offset
        if  v != None :
            me._age_offset  = v
        return(ov)


    def lc_mult(me, v = None)   :
        ov  = me._lc_mult
        if  v != None :
            me._lc_mult         = v
            me._set_score_mult_sum()
        return(ov)

    def lc_word_mult(me, v      = None) :
        ov  = me._lc_word_mult
        if  v != None :
            me._lc_word_mult    = v
            me._set_score_mult_sum()
        return(ov)

    def lc_first_mult(me, v     = None) :
        ov  = me._lc_first_mult
        if  v != None :
            me._lc_first_mult   = v
            me._set_score_mult_sum()
        return(ov)

    def age_mult(me, v      = None)  :
        ov  = me._age_mult
        if  v != None :
            me._age_mult    = v
            me._set_score_mult_sum()
        return(ov)

    def fage_mult(me, v     = None) :
        ov  = me._fage_mult
        if  v != None :
            me._fage_mult   = v
            me._set_score_mult_sum()
        return(ov)

    def oage_mult(me, v     = None) :
        ov  = me._oage_mult
        if  v != None :
            me._oage_mult   = v
            me._set_score_mult_sum()
        return(ov)

    def length_mult(me, v   = None) :
        ov  = me._length_mult
        if  v != None :
            me._length_mult = v
            me._set_score_mult_sum()
        return(ov)

    def _set_score_mult_sum(me) :
        me._score_mult_sum      = sum([ getattr(me, v) for v in me._score_mult_settings ])


    def __init__(me) :

        me._min_longer      = 6.0

        me._max_first_match = 16.0
        me._max_length      = 48.0
        me._age_offset      = 3.0

        me._lc_mult         = 1.0
        me._lc_word_mult    = 2.0
        me._lc_first_mult   = 30.0
        me._age_mult        = 2.0
        me._fage_mult       = 0.9
        me._oage_mult       = 0.75
        me._length_mult     = 0.3

        me._score_mult_settings = [ v for v in dir(me) if v.startswith('_') and v.endswith('_mult') ]
        me._set_score_mult_sum()

        # print "@@@@", me._score_mult_settings, me._score_mult_sum


    def score_mult_sum(me) :
        """ Calculate the sum off all our multipliers. """
        return(me._score_mult_sum)


    def parse_cmd_line(me, argv) :
        """ Get our command line parameters from the given sys.argv - type list, desctructively. """

        for nm in me._score_mult_settings :
            pnm     = nm[1:]
            while True :
                oi  = tzlib.find_argi(argv, [ "--" + pnm ])
                if  oi < 0  :   break
                del argv[oi]
                getattr(me, pnm)(float(argv.pop(oi)))
            pass
        pass


    def __str__(me) :
        """ Return a string for us. """
        return(' '.join([ re.sub('(?!_.)', '', nm).replace('_', '') + '=%.1f' % getattr(me, nm) for nm in me._score_mult_settings ]))

    #   a_settings



def first_match(s1, s2) :
    """ Return how many characters are the same at the start of the given strings. """
    for ci, c in enumerate(s1) :
        if  (ci >= len(s2)) or (c != s2[ci]) :
            return(ci)
        pass
    return(len(s1))




class   a_matcher(str) :
    """
        Define a string that other strings can be matched against.
        These object calculate and store a similarity score.
        And they can remember their "age".
        "Age" is a monotonically increasing value.
        The object's age tells how recently the object
        has been chosen from among many such objects.

        There are 3 ages kept:
        .   The age when the object is chosen.
        .   The fage, an older, IIR filtered age value.
        .   The oage, an much older, IIR filtered age value.

    """

    def __new__(me, s, real_str = None, settings = None, age = 0) :
        """
            Create and return a new object,
            given a string and an optional "real" string
            the given string will be translated to.

        """
        ss          = s.strip()
        me          = str.__new__(me, ss)
        me.real_str = real_str or s
        me.lc       = ss.lower()
        me.word     = get_word(ss)
        me.lc_word  = get_word(me.lc)

        me.settings = settings or a_settings()

        age         = float(age or 0)
        me.age      = age                   # this is the age the object was chosen
        me.fage     = age
        me.oage     = age

        me._score   = 0.0

        return(me)


    def score(me, s = None, age = None, verbose = 0) :
        """
            Calculate and store the score for this matcher against an arbitrary stripped string.
            Return the score.

        """
        if  s   :
            age = max(me.age, age)
            age = float(age)
            lc  = s.lower()

            sm  = me.settings.lc_mult()       * difflib.SequenceMatcher(None, lc, me.lc     ).ratio()
            wm  = me.settings.lc_word_mult()  * difflib.SequenceMatcher(None, lc, me.lc_word).ratio()
            im  = me.settings.lc_first_mult() * (min(me.settings._max_first_match, first_match(lc, me.lc)) / me.settings._max_first_match)
            am  = me.settings.age_mult()      * ((me.age   + me.settings._age_offset) / (age + me.settings._age_offset))
            fm  = me.settings.fage_mult()     * ((me.fage  + me.settings._age_offset) / (age + me.settings._age_offset))
            om  = me.settings.oage_mult()     * ((me.oage  + me.settings._age_offset) / (age + me.settings._age_offset))
            lm  = me.settings.length_mult()   * min(len(me), me.settings._max_length) / me.settings._max_length

            me._score   = (sm + wm + im + am + fm + om + lm)          / me.settings.score_mult_sum()

            if  verbose :
                print      sm,  wm,  im,  am,  fm,  om,  lm, me._score, me.settings.score_mult_sum()
            pass

        return(me._score)


    def choose(me, age) :
        """ This string was selected. Keep its age up to date. """
        me.age      =       age
        me.fage     = ((me.fage *  7.0) + age) /  8.0
        me.oage     = ((me.oage * 31.0) + age) / 32.0


    #   a_matcher




def learn(ma, sa, ra = None, settings = None, known = None) :
    """
        Given an array of a_matcher's and an array of strings input by a user,
        update the array of a_matcher's with the strings input by the user.

        If 'ra' is given, it's an array of string parallel with 'sa' given
        the "real" strings that should be substituted at choose time for matched input.
        'ra' elements can be None or empty strings to take their respective 'sa' values.

        Return the updated matcher array.

        """
    ma      = ma or []

    ra      = ra or []
    ra     += ([ None ] * (len(sa) - len(ra)))

    known   = known or {}
    for si, s in enumerate(sa) :
        s   = s.strip()
        if  s and (s[0] not in [ ';', '#']) :

            if  s in known  :
                known[s].choose(len(ma))            # note the string has been chosen more recently
            else            :
                rs          = ra[si] or s           # learn the input string as a matcher
                if  s.find("<-") >= 0 :
                    rs      = [ ss.strip() for ss in s.split("<-") if ss.strip() ]
                    rs[1:]  = " ".join(rs[1:])

                m           = a_matcher(s, rs, age = len(ma), settings = settings)
                known[s]    = m

                ma.append(m)
            pass
        pass

    return(ma)



def print_choice(s, m, age) :
    print "%60s %10.5f %5.1f %5.1f" % ( m.real_str, m.score(), m.fage / age, m.oage / age, ),


def find_str(sa, s, age = None) :
    if  len(sa) :
        s   = s.strip()
        age = age + 1.0

        # fa  = difflib.get_close_matches(s, [ m.lc      for m in sa ] , len(sa), .2)
        # wa  = difflib.get_close_matches(s, [ m.lc_word for m in sa ] , len(sa), .2)
        ml  = int(len(s) + sa[0].settings.min_longer())
        ra  = [ ss for ss in sa if len(ss) >= ml ]
        for m in ra :
            m.score(s, age)

        if  len(ra) :
            ra.sort(lambda a, b : cmp(b.score(), a.score()) or cmp(len(b), len(a)) or cmp(a, b))

            m   = ra[0]
            m.score(s, age, verbose = 100)
            m.choose(age)

            print "%4u %4u %-60s" % ( len(sa), len(ra), s, ),
            print_choice(s, m, age)

            if  False :
                if  len(ra) > 1 :
                    m   = ra[min(1, len(ra) - 1)]
                    print_choice(s, m, age)
                pass
            elif True :
                for i in xrange(min(20, len(ra))) :
                    print
                    print "    %i" % i,
                    print_choice(s, ra[i], age)
                pass

            print

        pass
    pass


help_str    = """
%s (options) histor_file user_string...

    Suggest type-ahead strings.

Options:

    --verbose                       Increase the verbosity level.

This program inputs a file containing text input history in multi-line string form
and strings a person has typed.

For each userr-input string, it prints the 1st two choices of
type-ahead strings in liklihood order of being what the user
would want to type in place of the string.

"""



if  __name__ == '__main__' :

    import  os

    import  TZCommandLineAtFile


    program_name    = sys.argv.pop(0)

    TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv)


    verbose         = 0

    while True :
        oi  = tzlib.find_argi(sys.argv, [ "--help", "-h", "-?", "/h", "/H", "/?" ] )
        if  oi < 0  :   break
        del sys.argv[oi]
        print help_str % ( os.path.basename(program_name), )
        sys.exit(254)

    settings    = a_settings()
    settings.parse_cmd_line(sys.argv)

    while True :
        oi  = tzlib.find_argi(sys.argv, [ "--verbose", "-v", ] )
        if  oi < 0  :   break
        del sys.argv[oi]
        verbose    += 1


    if  not len(sys.argv) :
        print help_str % ( os.path.basename(program_name), )
        sys.exit(101)
    hfile   = sys.argv.pop(0)


    if  not len(sys.argv) :
        print >>sys.stderr, "Please tell me what a person has input so far"
        sys.exit(102)


    fd      = tzlib.safe_read_whole_text_file(hfile)
    if  not fd :
        print >>sys.stderr, "Empty file, %s. Please tell me what a person has input so far" % hfile
        sys.exit(103)

    known   = {}
    ma      = learn([], fd.split('\n'), known = known)
    if  not len(ma) :
        print >>sys.stderr, "I learned nothing from %s!" % hfile
        sys.exit(104)

    while len(sys.argv) :
        s   = sys.argv.pop(0)
        find_str(ma, s, age = len(ma))
        learn(ma, s, known = known)
        print
    pass


# eof
