#!/usr/bin/python

# tzlib.py
#       --copyright--                   Copyright 2007 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       July 22, 2003           bar
#       October 7, 2003         bar     ambiguous_file_list
#       January 31, 2004        bar     find_in_array
#       September 28, 2004      bar     define False and True
#       October 25, 2004        bar     hash sort routines (built in already?)
#       January 12, 2005        bar     array_find
#       January 13, 2005        bar     file_nameable and ascii and crc32 routines moved to here
#       January 17, 2005        bar     add <>+'" to the characters changed by file_nameable
#                                       fix __ALL__'s file_name_able name
#       January 30, 2005        bar     _ascii() call -> ascii() - and make it right and import re
#       February 9, 2005        bar     tz_vector_cosine
#       February 17, 2005       bar     html character entities
#       February 25, 2005       bar     safe_html
#       March 1, 2005           bar     option to translate &nbsp; to space or whatever
#       March 19, 2005          bar     typo in ascii - StringType's
#       June 27, 2005           bar     collapse array_find in to find_in_array
#                                       without_dupes
#       July 12, 2005           bar     check crc
#       August 24, 2005         bar     more fooling with the new python warnings about 32 bit ints in crc logic
#       September 5, 2005       bar     look up any of an array of strings in find_in_array / array_find
#       March 25, 2006          bar     try to avoid a latin1 to ascii fuss
#       April 26, 2006          bar     strrev
#       June 14, 2006           bar     string_pairs and flat_positional_strings
#       June 16, 2006           bar     de_html_str
#       June 21, 2006           bar     pull scripts out in de_html_str
#       May 5, 2007             bar     linear_regression
#       May 16, 2007            bar     zlib crc
#       May 17, 2007            bar     printable
#       June 22, 2007           bar     comment
#       July 1, 2007            bar     longitudinal parity (xor sum)
#       October 12, 2007        bar     distance / direction routines (here so I don't need to look 'em up again)
#       November 18, 2007       bar     turn on doxygen
#       November 20, 2007       bar     move lf_only() and no_blank_lines() from strip_files.py
#                                       lf_only() fixes \r\r\n to be one \n
#                                       as does safe_html() make \r\r\n one <BR>
#                                       c_string()
#                                       read_whole_text_file() (finally)
#                                       read_whole_binary_file() (ditto)
#       November 27, 2007       bar     lf_only_with_no_trailing_white_space()
#       November 27, 2007       bar     insert boilerplate copyright
#       December 1, 2007        bar     elapsed_time()
#       December 14, 2007       bar     write the file in write_whole_...
#       December 15, 2007       bar     multiline_strip
#       December 21, 2007       bar     maybe_wrap_with_cdata
#       January 1, 2008         bar     s_except_1()
#       January 18, 2008        bar     unicode_byte_string()
#                                       and auto-convert unicode to utf-16 or utf-8 when writing whole files
#       January 20, 2008        bar     same_object() (for doc purposes)
#       January 21, 2008        bar     s_except_1 takes lists and dictionarys
#       January 29, 2008        bar     sys_err_file_line()
#       February 8, 2008        bar     finally, a binary_search() i can remember
#                                       fix blkcrc32() under python 2.2 (zlib's crc isn't right, apparently)
#                                       add start/end indices to find_in_array and array_find
#       March 9, 2008           bar     temp_file_name
#       March 12, 2008          bar     allow arrays to de_html_str()
#       March 13, 2008          bar     comment
#       May 17, 2008            bar     email adr
#       August 13, 2008         bar     make_dictionary works for strings
#       August 18, 2008         bar     use basestring
#       August 29, 2008         bar     basestring instead of StringType because of unicode strings and others
#       --eodstamps--
##      \file
#
#
#       Buncha things.
#
#

import  glob
import  htmlentitydefs
import  math
import  os.path
import  random
import  re
import  string
import  sys
import  time
import  traceback
import  zlib
from    types                   import ListType, TupleType, UnicodeType, DictionaryType


##  Run under older Pythons
try:
    True, False
except NameError:
    True    = 1
    False   = 0



def ambiguous_file_list(ambiguous_name, do_sub_dirs = False) :
    """
        Return and array with the names of the files that match the given ambiguous file name.
    """

    files = glob.glob(ambiguous_name)

    if  do_sub_dirs :
        (dir_name, amb_name) = os.path.split(ambiguous_name)
        if  not len(dir_name) :
            dir_name         = "./"

        ambiguous_name       = os.path.join(os.path.normpath(dir_name), os.path.normpath(amb_name))

        for fn in os.listdir(dir_name) :
            ffn = os.path.join(os.path.normpath(dir_name), fn)
            if  os.path.isdir(ffn) :
                fls = ambiguous_file_list(os.path.join(ffn, amb_name), do_sub_dirs)
                for fln in fls :
                    files.append(fln)
                pass
            pass
        pass

    return(files)



def binary_search(a, item, cmp_rtn = None, si = None, ei = None, cmp_obj = None) :
    """
        Binary search the sorted array, 'a' looking for 'item' or the first index in the array that has a value greater than 'item'.
        Use the given 'cmp_rtn', which looks like the default, direct_cmp(), below.
        Return the found index (or len(a) if all the array's items are above 'item').
    """


    def direct_cmp(a, i, item, cmp_obj) :
        return(cmp(a[i], item))


    si      = si or 0
    if  ei == None  :
        ei  = len(a)

    if  cmp_rtn == None :
        cmp_rtn = direct_cmp

    lo      = si
    hi      = ei
    mid     = (lo + hi) / 2
    while lo < hi :
        mid = (lo + hi) / 2

        if  cmp_rtn(a, mid, item, cmp_obj) < 0 :
            mid    += 1
            lo      = mid
        else :
            hi      = mid
        pass

    # let the caller do mid=max(0, min(len(a), mid) if he wants

    return(mid)





def find_in_array(a, s, bi = None, ei = None) :
    """
        Find an item in an array, 'a' - or the first of an array of items, 's', in the array..
        Return -1 if not found.
        Otherwise return the found array index.
    """

    if  not isinstance(a, ListType) and not isinstance(a, TupleType) :
        a = [ a ]

    if  not isinstance(s, ListType) and not isinstance(s, TupleType) :
        s = [ s ]

    bi  = bi or 0
    if  ei == None :
        ei  = len(a)

    for ss in s :
        try :
            i = a.index(ss, bi, ei)
            return(i)

        except TypeError :                  # catch python 2.2 or whatever
            try :
                i = a[bi:ei].index(ss)
                return(i)

            except IndexError :
                pass
            except ValueError :
                pass
            pass
        except IndexError :
            pass
        except ValueError :
            pass
        pass

    return(-1)



def make_dictionary(a, val = True) :
    """
        Make a dictionary from a list/tuple.

        If python is 2.3+, then this is fromkeys().
    """

    retval = {}

    if  a != None :

        if  not isinstance(a, ListType) and not isinstance(a, TupleType) and not isinstance(a, basestring) :
            a = [ a ]

        for k in a :
            retval[k] = val
        pass

    return(retval)



def without_dupes(a) :
    """
        Return a copy of the given array with dupes removed.
        The item order will probably be changed.
    """

    return(make_dictionary(a).keys())



def keys_sorted_by_values_keys(hash_dict) :
    """
        Return an array of the keys from a dictionary, sorted by value/key.
    """

    def _vkcmp(k1, k2) :
        c      = cmp(hash_dict[k1], hash_dict[k2])
        if  c != 0 :
            return(c)
        return(cmp(k1, k2))

    v = hash_dict.keys()

    v.sort(_vkcmp)

    return(v)


def values_sorted_by_values_keys(hash_dict) :
    """
        Return an array of the values from a dictionary, sorted by value/key.
    """

    kys = keys_sorted_by_values_keys(hash_dict)

    return(map(lambda k : hash_dict[k], kys))



def keys_sorted_by_keys_values(hash_dict) :
    """
        Return an array of the keys from a dictionary, sorted by key/value.
    """

    def _vkcmp(k1, k2) :
        c      = cmp(k1, k2)
        if  c != 0 :
            return(c)
        return(cmp(hash_dict[k1], hash_dict[k2]))

    v = hash_dict.keys()

    v.sort(_vkcmp)

    return(v)


def values_sorted_by_keys_values(hash_dict) :
    """
        Return an array of the values from a dictionary, sorted by key/value.
    """

    kys = keys_sorted_by_keys_values(hash_dict)

    return(map(lambda k : hash_dict[k], kys))


def array_find(a, s, si = None, ei = None) :
    """
        Return the index of the item in an array or negative 1 if it's not there.
    """

    return(find_in_array(a, s, si, ei))



def strrev(s) :
    """
        Reverse a string.

        Or post or 2.2 (2.3?) return(s[::-1])
    """

    a = list(s)
    a.reverse()
    return(string.join(a, ""))


def c_string(s) :
    """
        Escape a string back to C string form.
        Or, anyway, get the normal escaped characters back to their C string form.

        This *must* be a built in function somewhere!
    """

    s   =   s.replace("\\", r"\\")
    s   =   s.replace("\'", r"\'")
    s   =   s.replace("\"", r"\"")
    s   =   s.replace("\a", r"\a")
    s   =   s.replace("\b", r"\b")
    s   =   s.replace("\f", r"\f")
    s   =   s.replace("\n", r"\n")
    s   =   s.replace("\r", r"\r")
    s   =   s.replace("\t", r"\t")
    s   =   s.replace("\v", r"\v")

    return(s)



def unicode_byte_string(s) :
    """
        Return a Unicode string as a byte string.

        Note: encode puts 2 bytes (converted codecs.BOM_LE), at the start of the string (under Windows or maybe x86).
    """

    return(s.encode('utf-16'))



def ascii(s, ac = '_') :
    """
        Return the given string after converting all characters over 127 to _ or the given character.
    """

    if  ac == None :
        ac  = '_'

    as = ""

    if  not isinstance(s, basestring) :
        s = str(s)

    for i in range(0, len(s)) :
        c = s[i:i+1]
        if  ord(c) >= 128 :
            c = ac

        as += c

    return(as)



def file_name_able(fn) :
    """
        Return the given file name with the illegal file name characters stripped from it.
    """

    if  isinstance(fn, UnicodeType) :
        fn  = fn.encode('ascii', 'replace')

    fn      = ascii(fn)

    fn      = re.sub(r"[\"]", "", fn)
    fn      = re.sub(r"[\\\/\:\&\^\*\?\<\>\"\'\+]", "_", fn)

    return(fn.strip())



printable_re    =   re.compile(r"[^" + re.escape(string.printable) + r"]")
def printable(s, tochr = "_") :
    """
        Return a printable string with non-printable characters converted to underscores or whatever.
    """

    tochr   = tochr or "_"

    s   = printable_re.sub(tochr, s)

    return(s)



def lf_only(fs) :
    """ Convert all variants of line breaks to '\n' in a string with multiple text lines. """

    fs  = re.sub(r"\r+\n", "\n", fs)
    fs  = re.sub(r"\r",    "\n", fs)

    return(fs)




##  Get rid of leading line feeds (amounts to s.strip("\n"))
strip_first_lines_re            =   re.compile(r"^\n+",                 re.DOTALL)

##  Get rid of white-space that is at the ends of text lines inside a string.
strip_eol_spaces_re             =   re.compile(r"\s+$",                 re.MULTILINE)


def lf_only_with_no_trailing_white_space(fs) :
    """ Run a string through lf_only() and strip trailing white space from the lines, too. Insure last lines ends with LF. """

    fs  = lf_only(fs)
    fs  = strip_eol_spaces_re.sub("", fs)
    fs += "\n"

    return(fs)



def no_blank_lines(fs) :
    """
        Get rid of any blank or white-space-only lines in a string containing multiple text lines.

        Gets rid of white space at the ends of all text lines as a side effect.
        The last line is forced to end with an LF.
    """

    fs  = lf_only(fs)
    fs  = strip_eol_spaces_re.sub("", fs)
    fs += "\n"
    fs  = re.sub("\n(\s*\n)+", "\n", fs)
    fs  = strip_first_lines_re.sub("", fs)

    return(fs)



def multiline_strip(fs, chrs = None) :
    """
        Do a strip on a string with multiple text lines.
        If no \n is at the end of 'fs', there will be none at the end of the return value.
        CRLF, LF and CR all are considered to be an EOL.
    """

    fs  = "\n".join( [ s.strip(chrs) for s in re.split(r"\r?\n|\r", fs) ] )
    return(fs)
    if  chrs :
        rs  = re.escape(chrs)
    else :
        rs  = r"\s"

    fs  = lf_only(fs)

    fs  = re.sub(r"(?m)^[" + rs + "]+",  "", fs)
    fs  = re.sub(r"(?m)["  + rs + "]+$", "", fs)
    fs  = re.sub(r"^[" + rs + "]+",  "", fs)
    fs  = re.sub(r"["  + rs + "]+$", "", fs)

    return(fs)





_crc32_table = (
        0x4dbdf21c,
        0x500ae278,
        0x76d3d2d4,
        0x6b64c2b0,
        0x3b61b38c,
        0x26d6a3e8,
        0x000f9344,
        0x1db88320,
        -1610256068,        # 0xa005713c,
        -1112383144,        # 0xbdb26158,
        -1687465484,        # 0x9b6b51f4,
        -2032385648,        # 0x86dc4190,
         -690409300,        # 0xd6d930ac,
         -881975096,        # 0xcb6e20c8,
         -306769820,        # 0xedb71064,
         -268435456,        # 0xf0000000
        )

_TEST_CRC_VALUE     = -1737075662           # 0x98765432
_CRC32_MASK         = 0xFFFFffffl
try :
    _CRC32_MASK     = int(_CRC32_MASK)
except OverflowError :
    _CRC32_MASK     = -1





#
#       Compute a 32-bit (PKZIP) crc of a string or array.
#
#       REMEMBER! The value could easily be construed to be a signed int. Odd things might happen in future versions of python.
#       To be "correct" about it, the value should be massaged like "crc = long(crc) & 0xFFFFffffL", yeilding a long value.
#
#


def crc32(current_crc, c) :
    c            = int(c)
    current_crc  = (((current_crc >> 4) & 0x0FFFffff) ^ _crc32_table[(current_crc ^  c      ) & 0xf]) & _CRC32_MASK;
    current_crc  = (((current_crc >> 4) & 0x0FFFffff) ^ _crc32_table[(current_crc ^ (c >> 4)) & 0xf]) & _CRC32_MASK;

    return(current_crc)


def pure_python_crc32(current_crc, xmem) :
    # print "crclen", len(xmem)
    for i in range(0, len(xmem)) :
        cv = ord(xmem[i:i+1])
        current_crc = crc32(current_crc, cv)
        # print "%u %08x %02x %s\n" % ( i, current_crc, cv, chr(cv) )
    return(current_crc)



def blkcrc32(current_crc, xmem) :

    if  sys.version < "2.4" :
        return(pure_python_crc32(current_crc, xmem))                # maybe 2.3 is ok. I don't know. But 2.4 is ok and 2.2 is not.

    current_crc = long(current_crc)
    if  current_crc & 0x80000000L :
        current_crc = (~current_crc + 1) & 0xFFFFffffL
        current_crc = -current_crc

    return(long(zlib.crc32(xmem, int(current_crc))) & 0xFFFFffffL)





def xorsum(s) :
    """
        Compute the longitudinal parity (the XOR sum) of a string.
    """

    cs      = 0
    for c in s :
        cs ^= ord(c)

    return(cs)









#
#
#   Find the "cosine" of the angle between two vectors.
#
#       If either vector is None, then the return value is None.
#       If all elements of either of the arrays are zero (or None), then the return value is None.
#
#       Undef'd values in the arrays are ignored - that "dimension" is ignored, that is.
#
#       $cosine = tz_vector_cosine( [ 1, 2, 3 ], [ 4, -5, 6 ]);
#
#       If the vectors are the same, then the return value is 1.
#       If the vectors are exactly opposite in direction and magnitude, then the return value is -1.
#       If the vectors are exactly at "right angles", then the return value is 0.
#
#
def tz_vector_cosine(v1, v2) :
    """
        Return the hyperspace cosine of two vectors.
    """

    if  (v1 == None) or (v2 == None) :    return(None)

    sum      = 0.0;
    ss1      = 0.0;
    ss2      = 0.0;

    for i in range(0, min(len(v1), len(v2))) :

        i1   = v1[i]
        if  i1 == None :    continue

        i2   = v2[i]
        if  i2 == None :    continue

        i1   = float(i1)
        i2   = float(i2)

        sum += (i1 * i2)
        ss1 += (i1 * i1)
        ss2 += (i2 * i2)

    if  (ss1 == 0.0) or (ss2 == 0.0) :
        return(None)

    return(sum / math.sqrt(ss1 * ss2))




#
#
#       Decode HTML character entities in a string.
#
#
html_entity_re  = re.compile(r"\&[^;\r\n\s]+;", re.DOTALL)

def decode_html_entities(s, nbsp_chr = None) :
    """
        Decode any HTML character entities in the string to their character counterparts.
    """

    if  nbsp_chr == None :
        nbsp_chr  = htmlentitydefs.entitydefs['nbsp']

    def _entity_2_chr(g) :
        try :
            w = g.group(0)[1:-1]

            if htmlentitydefs.entitydefs.has_key(w) :
                if  w == 'nbsp' :
                    w = nbsp_chr
                else :
                    w = htmlentitydefs.entitydefs[w]
                pass

            if  (w[0:2] == '&#') and (w[-1:] == ';') :          # handles ints from htmlentitydefs
                w = w[2:-1]
                w = chr(int(w))
            elif w[0:1] == '#' :                                # handles normal int values from the passed string
                w = chr(int(w[1:]))

            pass

        except :
            w = g.group(0)

        return(w)


    return(html_entity_re.sub(_entity_2_chr, s))




def safe_html(txt) :
    """
        Convert the given string in to text that can be put out in an HTML page without worry of it being interpreted as anything but text.
    """

    def _decimalfy(s) :
        return("&#" + str(ord(s.group(0))) + ";")

    txt = txt.replace("&", "&amp;")
    txt = txt.replace("<", "&lt;")
    txt = txt.replace(">", "&gt;")
    txt = lf_only(txt)
    txt = re.sub(r"[\r\n]", "<BR>", txt)
    txt = re.sub(r"-{6,}", "<HR>", txt)
    txt = re.sub(r"[^ -\176]", _decimalfy, txt)             # escape all characters outside of space-to-tilde

    return(txt)



cdata_string_re = re.compile(r"^<\!\[CDATA\[.*\]\]>$")

def maybe_wrap_with_cdata(s) :
    """ If needed, and if it's not already wrapped with a CDATA tag, wrap this string with <[!CDATA[...]]> """

    if  not cdata_string_re.match(s) :
        ns  = safe_html(s)
        if  ns != s :
            s   = "<![CDATA[%s]]>"  % ( s.replace("]]>", "&#93;&#93;&gt;") )
        pass

    return(s)





de_br_re        = re.compile(r"</?br\b[^>]{0,200}>",                    re.DOTALL + re.IGNORECASE)
de_p_re         = re.compile(r"</?p\b[^>]{0,200}>",                     re.DOTALL + re.IGNORECASE)
de_hr_re        = re.compile(r"<hr\b[^>]{0,200}>",                      re.DOTALL + re.IGNORECASE)
de_lfs_re       = re.compile(r"<(/?(?:table|tr|dir|li|ol|ul|dt|dl))\b", re.DOTALL + re.IGNORECASE)

de_tab_re       = re.compile(r"\t",                                     re.DOTALL + re.IGNORECASE)
de_space_re     = re.compile(r" +",                                     re.DOTALL + re.IGNORECASE)

de_lf_re        = re.compile(r"\n *\n *(?:\n *)+",                      re.DOTALL + re.IGNORECASE)
de_lfsp_re      = re.compile(r"\n +",                                   re.DOTALL + re.IGNORECASE)
de_splf_re      = re.compile(r" +\n",                                   re.MULTILINE)
de_multlf_re    = re.compile(r"\n+",                                    re.DOTALL + re.IGNORECASE)

de_script_re    = re.compile(r"<script\b[^>]*>.*?</script\b[^>]*>",     re.DOTALL + re.IGNORECASE)

de_html_re      = re.compile(r"<[^>]+>",                                re.DOTALL + re.IGNORECASE)


def de_html_str(s) :
    """
        Do cheaply what something like lynx could do from the command line:
          Convert a string that contains HTML in to a text string with no HTML markup, but
          with the text looking kind of like the HTML would look if rendered as text.
    """
    if  isinstance(s, ListType) or isinstance(s, TupleType) :
        s       = "\n".join(s)

    s = de_tab_re.sub(" ", s)
    s = de_space_re.sub(" ", s)

    s = s.replace("\r\n", "\n")
    s = s.replace("\r",   "\n")
    s = de_lf_re.sub("\n\n", s)
    s = de_lfsp_re.sub("\n", s)
    s = de_splf_re.sub("\n", s)
    s = de_multlf_re.sub("\n", s)

    s = de_br_re.sub("\n", s)
    s = de_p_re.sub("\n\n", s)
    s = de_hr_re.sub("\n----------------------------------------\n", s)
    s = de_lfs_re.sub(r"\n<\1", s)

    s = de_script_re.sub("", s)

    s = de_html_re.sub("", s)                   # remove all HTML tags

    s = decode_html_entities(s, nbsp_chr = ' ')

    s = s.replace("\x93", '"')                  # left  double quote
    s = s.replace("\x91", '`')                  # left  single quote
    s = s.replace("\x94", '"')                  # right double quote
    s = s.replace("\x92", "'")                  # right single quote
    s = s.replace("\x9c", "oe")
    s = s.replace("\x96", "-")                  # n-dash

    s = s.replace("\x8b", "<")
    s = s.replace("\x9b", ">")
    s = s.replace("\x8c", "OE")


    s = s.replace("\xa0", ' ')                  # non-break space
    s = s.replace("\x7f", ' ')                  # rubout

    s = s.replace("\xad", "-")                  # soft hyphen
    s = s.replace("\xaf", "-")                  # macron mark
    s = s.replace("\xab", "<<")
    s = s.replace("\xbb", ">>")

    s = de_tab_re.sub(" ", s)
    s = de_space_re.sub(" ", s)

    s = de_lf_re.sub("\n\n", s)
    s = de_lfsp_re.sub("\n", s)
    s = de_splf_re.sub("\n", s)

    return(s)




def string_pairs(a, skip = 1, connector_str = "\x80_Pr_") :
    """
        Return an array of strings composed of each sequential pair in the given array of strings.
    """
    if  False :

        t2  = [ 0 ] * (len(a) - skip)

        for i in xrange(len(t2)) :
            t2[i]   =   a[i] + connector_str + a[i + skip]
        pass

    else :
        t2          = [ a[i] + connector_str + a[i + skip] for i in xrange(len(a) - skip) ]

    return(t2)




def flat_positional_strings(strings, buckets = 3, ident_str = "\x80_Pf%u_%s") :
    """
        Given an array of strings, return an array of the strings with their positions in the array concatenated with them.
        For each string, there are two positions so that equal strings in similar positions in two arrays will yield at least one equal position string.
        The 'buckets' refers to how many sections of the array are used. Each identical string inside a particular bucket/section will yield at least one positional string that's the same.
            The first positional  string will identify the bucket the string is in.
            The second positional string will identify which pair of buckets the string is in and nearest.
        The 'ident_str' must contain a %u and a %s in that order.
            The %u will be filled in with the bucket or half-bucket number.
            The %s will be filled in with the string.
    """

    if  len(strings) == 0 :
        return( [] )


    pstrs   = [ 0 ] * (len(strings) * 2)


    def _fill(pi, ni, ne, n) :
        # print pi, ni, ne, n
        for i in xrange(ni, min(ne, len(strings))) :
            pstrs[pi]   = ident_str % ( n, strings[i] )
            pi         += 1

        return( ( pi, ne ) )

    d   = min((len(strings) + buckets - 1) / buckets, 2)
    d2  = (d + 1) / 2
    i   = pi = 0
    ni  = 0
    while ni < len(strings) :
        (pi, ni)    = _fill(pi, ni, ni + d2,  i)
        d2          = d
        i          += 1

    ni  = 0
    while ni < len(strings) :
        (pi, ni)    = _fill(pi, ni, ni + d2,  i)
        i          += 1

    return(pstrs)



def log_positional_strings(strings, log_mult = 2.0, ident_str = "\x80_Pl_%u_%s") :
    """
        Given an array of strings, return an array of the strings with their positions in the array concatenated with them.
        For each string, create a string concatenated with the log of the position of the string in 'strings' multiplied by 'log_mult'.
        The 'ident_str' must contain a %u and a %s in that order.
            The %u will be filled in with the string's position in 'strings'
            The %s will be filled in with the string.
    """

    sa  = [ ident_str % ( int(math.log(i + 1) * log_mult), strings[i] ) for i in xrange(len(strings)) ]

    return(sa)





def linear_regression(ax = None, ay = []):
    """
        Given an array of ax and ay values,
        return the values that make a linear regression line,
        (m * ax[i]) + a = ay[i]
    """

    if  not ax :
        ax  = range(len(ay))

    if  (not isinstance(ax, ListType)) and (not isinstance(ax, TupleType)) :
        ax  = [ x + ax for x in xrange(len(ay)) ]

    if len(ax) != len(ay) :
        raise ValueError, 'Different length of ax and ay array'

    ln      = len(ax)

    sx      = sy    =   sxx =   syy =   sxy =   0.0

    for i   in xrange(ln) :
        x   = ax[i]
        y   = ay[i]

        sx  = sx + x
        sy  = sy + y

        sxx = sxx + (x * x)
        syy = syy + (y * y)
        sxy = sxy + (x * y)

    d       =  (sxx * ln) - (sx * sx)

    m       = ((sxy * ln) - (sy * sx )) / d
    a       = ((sxx * sy) - (sx * sxy)) / d

    return( ( m, a ) )





def y_from_distance_direction(dist, direction) :
    """
        Get the Y value from a distance and a direction (direction is -pi..pi, where both pi's are at y<0:x=0 and 0 direction is y=0:x>=0)
    """

    return(dist * sin(direction))


def x_from_distance_direction(dist, direction) :
    """
        Get the X value from a distance and a direction (direction is -pi..pi, where both pi's are at y<0:x=0 and 0 direction is y=0:x>=0)
    """

    return(dist * math.cos(direction))


def distance_from_x_y(x, y) :
    """
        Get the distance from an X and Y value.
        This routine is here for documentation purposes, folks.
    """

    return(math.hypot(y, x))


def direction_from_x_y(x, y) :
    """
        Get the direction from an X and Y value.
        This routine is here for documentation purposes, folks.
    """

    return(math.atan2(y, x))





def _read_whole_file(fname, how = "t") :
    """ Read a whole file. """

    fi  = open(fname, "r" + how)
    fs  = fi.read()
    fi.close()

    return(fs)


def read_whole_text_file(fname) :
    """ Read a whole text file. """

    return(_read_whole_file(fname, "t"))


def read_whole_binary_file(fname) :
    """ Read a whole binary file. """

    return(_read_whole_file(fname, "b"))





def _write_whole_file(fname, how, s) :
    """ Write a whole file. """

    if  isinstance(s, UnicodeType) :
        s   = unicode_byte_string(s)

    fi  = open(fname, "w" + how)
    fi.write(s)
    fi.close()

    del(fi)



def write_whole_text_file(fname, s) :
    """ Write a whole text file. """

    if  isinstance(s, UnicodeType) :
        s   = s.encode('utf-8')

    return(_write_whole_file(fname, "t", s))


def write_whole_binary_file(fname, s) :
    """ Write a whole binary file. """

    return(_write_whole_file(fname, "b", s))



def elapsed_time() :
    """ Return a running clock value based on some arbitrary "zero". """

    if  sys.platform == 'win32' :
        return(time.clock())        # as an alternate: win32api.GetTickCount()

    #       Mac could use MacOS.GetTicks() / 60.0, I suppose.

    return(os.times()[4])           # under windows this value stays at zero




def get_ini_or_cfg_file_name(base_name, app_name = None) :
    """ Return a file name of config_directory/base_name.ini or .cfg, appropriate to the OS. """

    base_name       = base_name or "tzlibpy"

    if  sys.platform == 'win32' :
        app_name    = app_name or base_name

        app_path    = os.path.normpath(os.path.expandvars("${appdata}" + "/" + app_name))
        if  not os.path.isdir(app_path) :
            os.makedirs(app_path)
        return(os.path.join(app_path + "/" + base_name + ".ini"))

    return(os.path.expanduser(       "~/." + base_name + ".cfg"))



def temp_file_name(base_name = None, app_name = None, ext = None) :
    ext = ext or ".tmp"

    if  (not app_name) or (not base_name) :
        td  =       os.environ.get("TEMP", None)
        td  = td or os.environ.get("TMP", None)
        td  = td or "."
        fn  = os.path.join(td, "tmp_tzlibpy" + ext)
    else :
        fn  = get_ini_or_cfg_file_name(base_name, app_name)

    while True :
        rs  = "_%08x" % ( random.randint(0, 2000000000) )
        rfn = os.path.splitext(fn)[0] + rs + ext
        if  not os.path.isfile(rfn) :
            break
        pass

    return(rfn)




def s_except_1(v) :
    """ Return an "s" if the 'v' is not 1. """

    if  isinstance(v, (TupleType, ListType, DictionaryType ) ) :
        v   = len(v)

    if  v != 1 :
        return("s")
    return("")





def same_object(o1, o2) :
    """ Return whether these two things are the same, exact thing (in memory). """

    return(id(o1) == id(o2))



def sys_err_file_line(sys_exc_info_2 = None) :
    """ Return a simple, one-line string with the bottom line about where the latest except failure has been. """

    sys_exc_info_2  = sys_exc_info_2 or sys.exc_info()[2]

    lns = traceback.format_tb(sys_exc_info_2)[-1].strip()
    lns = re.sub(r"[\r\n]+", " ---- ", lns)

    return(lns)



#
#
#       Test
#
#
if __name__ == '__main__' :

    ctm = elapsed_time()
    print "%20.10f" % ( ctm )

    a = [ 1, 2, 3 ]

    d = make_dictionary(a)

    print d[1], d[2], d[3]
    d = make_dictionary(a, "17")

    b = d.keys()
    a.sort()
    b.sort()
    for i in range(0, max(len(a), len(b))) :
        if  a[i] != b[i] :
            print "a != b", i
        pass

    crc = blkcrc32(_TEST_CRC_VALUE, "now is the time")
    crc = long(crc) & 0xFFFFffffL
    print "crc=0x%08lx %lu" % ( crc, crc )
    if  crc != 0xa458b82eL :
        s    = "crc [%s] is wrong!" % ( str(crc) )
        raise(s)
    crc = blkcrc32(0x98765431L, "now is the time")
    crc = crc & 0xFFFFffffL
    print "crc=0x%08lx %lu" % ( crc, crc )
    if  crc != 0xb525d257L :
        s    = "crc [%s] is wrong!" % ( str(crc) )
        raise(s)

    crc = 0
    bts = 0
    for i in range(1000) :
        s   = "%u %f" % ( i, random.random() )
        pc  = pure_python_crc32(crc, s)
        bts |= crc
        nc  = blkcrc32(crc,          s)
        if  pc != nc :
            s   = "crc mismatch 'tween pure and zlib (%s (%08lx:%u) %08lx:%u != %08lx:%u)!" % ( s, crc, crc, pc, pc, nc, nc )
            raise(s)
        if  random.random() >= 0.5 :
            crc = pc
        else :
            crc = nc

        if  (i > 100) and ((bts & 0xFFFFffffL) == 0xFFFFffffL) :
            break
        pass

    print "bcrc = %08x" % ( crc )


    if  array_find( [ 1, 2, 3 ], 2) != 1 :
        s = "1st array_find != 1"
        raise(s)
    if  array_find( [ 1, 2, 3 ], [ 2, 1 ] ) != 1 :
        s = "2nd array_find != 1"
        raise(s)
    if  array_find( [ 1, 2, 3 ], [ 1, 2 ] ) != 0 :
        s = "array_find != 0"
        raise(s)
    if  array_find( [ 1, 2, 3 ], [ 4, 5, 6, 7 ] ) >= 0 :
        s = "array_find >= 0"
        raise(s)


    if  strrev("x") != "x" :
        s = "strrev of 'x' is " + strrev("x")
        raise(s)

    if  strrev("xy") != "yx" :
        s = "strrev of 'xy' is " + strrev("xy")
        raise(s)

    if  strrev("xyz") != "zyx" :
        s = "strrev of 'xyz' is ", strrev("xyz")
        raise(s)


    s   = file_name_able("abc:^%&\"\';?*blah")
    if  s != "abc__%__;__blah" :
        raise   "file_name_able not right!", s



    print "Dir of *.py files in this directory:"
    files = ambiguous_file_list("*.py")
    print files

    print "Dir of *.py files in this directory and sub-dirs:"
    files = ambiguous_file_list("*.py", True)
    print files

    if  tz_vector_cosine( [ 1, 2, 3 ], [ 4, -5, 6 ]) != 0.365486942323903610 :
        s = "Cosine problem %2.50f" % ( tz_vector_cosine( [ 1, 2, 3 ], [ 4, -5, 6 ]) )
        raise(s)


    s   = " ".join(string_pairs( [ "a", "bb", "c", "d" ], 1, "x"))
    if  s != "axbb bbxc cxd" :
        s = "string_pairs problem: " + s
        raise(s)


    s   = " ".join(flat_positional_strings( [ "a", "bb", "c", "d" ], 3, "_%u_%s_"))
    if  s != "_0_a_ _1_bb_ _1_c_ _2_d_ _3_a_ _3_bb_ _4_c_ _4_d_" :
        s = "flat_positional_strings problem: " + s
        raise(s)


    s   = " ".join(log_positional_strings( [ "a", "bb", "c", "d", "e", "f", "g", "h", "i", "j" ], 2.0, "_%u_%s_"))
    if  s != "_0_a_ _1_bb_ _2_c_ _2_d_ _3_e_ _3_f_ _3_g_ _4_h_ _4_i_ _4_j_" :
        s = "log_positional_strings problem: " + s
        raise(s)


    s   = " ".join(log_positional_strings( [ "a", "bb", "c", "d", "e", "f", "g", "h", "i", "j" ], 3.0, "_%u_%s_"))
    if  s != "_0_a_ _2_bb_ _3_c_ _4_d_ _4_e_ _5_f_ _5_g_ _6_h_ _6_i_ _6_j_" :
        s = "log_positional_strings problem: " + s
        raise(s)


    ( m, a )    = linear_regression( ( -2, -1, 0, 1, 2, 3 ), ( -1, 0, 1, 2, 3, 4 ) )
    if  (m != 1.0) or (a != 1.0) :
        s   = "Line: m=" + str(m) + " a=" + str(a)
        raise   s


    ( m, a )    = linear_regression( None, ( -1, 1, 3, 5, 7 ) )
    if  (m != 2.0) or (a != -1.0) :
        s   = "NoneX: m=" + str(m) + " a=" + str(a)
        raise   s


    ( m, a )    = linear_regression( 1, ( -1, 1, 3, 5, 7 ) )
    if  (m != 2.0) or (a != -3.0) :
        s   = "OneX: m=" + str(m) + " a=" + str(a)
        raise   s


    ( m, a )    = linear_regression( ( -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6 ), ( -4, -5, -2, -3, 0, -1, 1, 3, 2, 5, 4, 7, 6 ) )
    if  (int(round(m * 1000.0)) != 967) or (a != 1.0) :
        s   = "Jag: m=" + str(m) + " a=" + str(a)
        raise   s


    s   = decode_html_entities("hearts:&hearts; sigma:&sigma; oacute=&oacute; gt=&gt; lt=&#60;")
    if  s != "hearts:&hearts; sigma:&sigma; oacute=\xf3 gt=> lt=<" :
        s = "decode_html_entities problem: " + s
        raise(s)


    s   = safe_html("\x00&<><>&~\x7f\x80\xff#\r\n-----\r\n------\r\n-------\r\n-\r-\n-\r\n")
    if  s != "&#0;&amp;&lt;&gt;&lt;&gt;&amp;~&#127;&#128;&#255;#<BR>-----<BR><HR><BR><HR><BR>-<BR>-<BR>-<BR>" :
        s = "safe_html problem: " + s
        raise(s)


    s   = printable(" !@#$%^&*()_-|\\{}[];:'\x22<>,.?/`~abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890  \x00\x01\x02\x1f\x7f\x80\xff z", "~+~")
    if  s !=        " !@#$%^&*()_-|\\{}[];:'\x22<>,.?/`~abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890  ~+~~+~~+~~+~~+~~+~~+~ z" :
        s =  "printable problem: " + s
        raise(s)

    s   = printable(" !@#$%^&*()_-|\\{}[];:'\x22<>,.?/`~abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890  \x00\x01\x02\x1f\x7f\x80\xff z")
    if  s !=        " !@#$%^&*()_-|\\{}[];:'\x22<>,.?/`~abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890  _______ z" :
        s =  "printable problem: " + s
        raise(s)


    s       =   c_string("[\\ \' \" \a \b \f \n \r \t \v \\]")
    if  s  !=   r"[\\ \' \" \a \b \f \n \r \t \v \\]" :
        s   =   "c_string: " + s
        raise(s)


    s       =   lf_only("\r\r\r\n\n\tnow\t\r\nis\r the \n\r\ntime  \r\r\nfor")
    if  s  !=   "\n\n\tnow\t\nis\n the \n\ntime  \nfor" :
        s   =   "lf_only: " + c_string(s)
        raise(s)

    s       =   lf_only_with_no_trailing_white_space("   \n   blah   \n lkjsd    \f   \t   \n lkjsdf  \t")
    if  s  !=   "\n   blah\n lkjsd\n lkjsdf\n" :
        s   =   "lf_only_with_no_trailing_white_space: " + c_string(s)
        raise(s)

    s       =   lf_only_with_no_trailing_white_space("   \n   blah   \n lkjsd    \f   \t   \n lkjsdf  \t\n")
    if  s  !=   "\n   blah\n lkjsd\n lkjsdf\n" :
        s   =   "lf_only_with_no_trailing_white_space: " + c_string(s)
        raise(s)

    s       =   no_blank_lines("\r\r\r\n\n\tnow\t\r\nis\r the \n\r\ntime  \r\r\nfor")
    if  s  !=   "\tnow\nis\n the\ntime\nfor\n" :
        s   =   "no_blank_lines: " + c_string(s)
        raise(s)


    s       =   multiline_strip("  x \r\n\t \tx \r  \t x\nbla   \t   x \n test \n")
    if  s  !=   "x\nx\nx\nbla   \t   x\ntest\n" :
        s   =   "multiline_strip: " + c_string(s)
        raise(s)

    s       =   multiline_strip("x \r\n\t \tx \r  \t x\nbla   \t   x \n test ")
    if  s  !=   "x\nx\nx\nbla   \t   x\ntest" :
        s   =   "multiline_strip: " + c_string(s)
        raise(s)

    s       =   multiline_strip("\r  x \r\n\t \tx \r  \t x\nbla   \t   x \n test ")
    if  s  !=   "\nx\nx\nx\nbla   \t   x\ntest" :
        s   =   "multiline_strip: " + c_string(s)
        raise(s)

    s       =   multiline_strip("  \r  x \r\n\t \tx \r  \t x\nbla   \t   x \n test ")
    if  s  !=   "\nx\nx\nx\nbla   \t   x\ntest" :
        s   =   "multiline_strip: " + c_string(s)
        raise(s)

    s       =   multiline_strip("  \r  xZ \r\n\tZ \tx \r  \t x\nbla   \t   x \n test ", " \tZ")
    if  s  !=   "\nx\nx\nx\nbla   \t   x\ntest" :
        s   =   "multiline_strip: " + c_string(s)
        raise(s)


    s       =   " now is the time "
    cs      =   maybe_wrap_with_cdata(s)
    if  s  != cs :
        raise   "gratuitous cdata"

    s       =   " now is the <time "
    cs      =   maybe_wrap_with_cdata(s)
    if  s  == cs :
        raise   "Missed cdata <"

    s       =   " now is the &time "
    cs      =   maybe_wrap_with_cdata(s)
    if  s  == cs :
        raise   "Missed cdata &"

    s       =   " now is the \ntime "
    cs      =   maybe_wrap_with_cdata(s)
    if  s  == cs :
        raise   "Missed cdata \\n"

    s       =   " now is the \rtime "
    cs      =   maybe_wrap_with_cdata(s)
    if  s  == cs :
        raise   "Missed cdata \\r"

    s       =   "<![CDATA[ now is the &time ]]>"
    cs      =   maybe_wrap_with_cdata(s)
    if  s  != cs :
        raise   "Double cdata"

    s       =   " <![CDATA[ now is the &time ]]>"
    cs      =   maybe_wrap_with_cdata(s)
    if  s  == cs :
        raise   "Missed imperfect cdata"
    if  cs !=   "<![CDATA[ <![CDATA[ now is the &time &#93;&#93;&gt;]]>" :
        raise   "Missed included cdata data [%s]" % ( cs )

    if  s_except_1(1) == 's' :
        raise   "s_except_1 for 1"
    if  s_except_1(1.0) == 's' :
        raise   "s_except_1 for 1"
    if  s_except_1(0) != 's' :
        raise   "s_except_1 for 0"
    if  s_except_1(2) != 's' :
        raise   "s_except_1 for 2"
    if  s_except_1(-1) != 's' :
        raise   "s_except_1 for -1"


    r   = binary_search([ 3, 6, 9, 12, 12, 13 ], 12)
    if  r != 3 :
        raise   "binary_search(12) not 3", r

    r   = binary_search([ 3, 6, 9, 10, 12, 13, 14 ], 10)
    if  r != 3 :
        raise   "binary_search(10) not 3", r

    r   = binary_search([ 3, 6, 9, 10, 12, 13, 14 ], 11)
    if  r != 4 :
        raise   "binary_search(11) not 4", r

    r   = binary_search([ 3, 6, 9, 10, 12, 13, 14 ], 110)
    if  r != 7 :
        raise   "binary_search(110) not 7", r

    r   = binary_search([ 3, 6, 9, 10, 12, 13, 14 ], -1)
    if  r != 0 :
        raise   "binary_search(-1) not 0", r

    # print "tfn", temp_file_name()
    # print "tfn", temp_file_name()


    s   = de_html_str("<HTML> <BODY>\n\r\n\n\r\r\nText  on  a\tline<BR>after <script>scripting</script>break</P attrib='lksjdflkjsldkjflsjdlfkjsldjfljsdf'> after paragraph <HR><script >and more scripting</script ><HR> \xd0 &amp; &lt;DIR&gt; after angled DIR <DIR>after DIR</LI>After end li")
    cs  = "\nText on a line\nafter break\n\nafter paragraph\n----------------------------------------\n\n----------------------------------------\n\xd0 & <DIR> after angled DIR\nafter DIR\nAfter end li"
    if  s != cs :

        print len(s), len(cs)

        def cstr(c) :
            if  (ord(c) >= 32) and (ord(c) < 0x7f) :
                return(c)
            return("|" + str(ord(c)) + "|")

        for i in xrange(len(s)) :
            if  s[i] != cs[i] :

                print i, cstr(s[i]), cstr(cs[i])
                break
            pass

        s = "de_html_str problem: [" + "".join([ cstr(s[i]) for i in xrange(len(s)) ]) + "]"
        raise(s)


    t   = elapsed_time()
    print "%20.10f %20.10f" % ( t, t - ctm )

    time.sleep(0.2)

    t   = elapsed_time()
    print "%20.10f %20.10f" % ( t, t - ctm )

    pass


##      Public things.
__ALL__ = [
            'ambiguous_file_list',
            'find_in_array',
            'array_find',
            'make_dictionary',
            'without_dupes',

            'keys_sorted_by_keys_values',
            'keys_sorted_by_values_keys',
            'values_sorted_by_keys_values',
            'values_sorted_by_values_keys',

            'ascii'

            'file_name_able',

            'lf_only',
            'no_blank_lines',

            'crc32',
            'blkcrc32',

            'tz_vector_cosine',

            'decode_html_entities',
            'safe_html',

            'linear_regression',
          ]


#
#
#
# eof
