#!/usr/bin/python

#   sms_backup_media.py
#       --copyright--                   Copyright 2018 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       May 19, 2018            bar
#       --eodstamps--
##      \file
#       \namespace              tzpython.sms_backup_media
#
#
"""
%s  (options) xml_file_name

Extract all the media (images and videos and vcards and ?) from an Android SMS_Backup .xml file.

Options:

    --out_dir   directory_path      Specify what directory to put the files in. Default: current directory.

    --verbose                       Print more stuff, including displaying image and verbosity level 2.

"""

import  base64
import  glob
import  os
import  re
import  sys
import  time

import  tzlib
import  tz_jpg
import  tz_parse_time


#           <part seq="0" ct="image/jpeg" name="null" chset="null" cd="null" fn="null" cid="&lt;image000000&gt;" cl="image000000.jpg" ctt_s="null" ctt_t="null" text="null" data="/9j/4Q....==" />
parts_re    = re.compile(r'<part ([^>]*?)>')
part_re     = re.compile(r'ct="([^"]+)"\s+name="([^"]+)"\s+.*cl="([^"]+)".*data="([^"]+)"\s+/$')


def extract_media_files(fn, out_dir = None, verbose = 0) :
    """
        Extract media files from an Android SMS_Backup XML file
        named fn to an existing, given directory (default:
        current dir).

        Return a list of the files extracted or throw exceptions
        in the usual cases (e.g. No input file).

    """
    rv  = []
    fd  = tzlib.read_whole_text_file(fn)
    pts = parts_re.findall(fd)
    for pt in pts :
        g   = part_re.search(pt)
        if  g :
            if  g.group(2) in [ g.group(3), 'null', ] :
                fn  = os.path.join(out_dir or os.getcwd(), g.group(3))
                md  = base64.b64decode(g.group(4))
                if  (g.group(1) == 'image/x-ms-bmp') and (os.path.splitext(fn)[1].lower() == '.bmp') :
                    if  not md.startswith('BM') :                               # ignore this weirdness check if the file looks like a .BMP file
                        try     :
                            tz_jpg.a_jpg(md)
                            fn  = os.path.splitext(fn)[0] + ".jpg"              # fix some kind of weirdness with some images
                        except ValueError :
                            pass
                        pass
                    pass
                i           = 0
                wrote       = False
                while True  :
                    if  not i :
                        ffn = fn
                    else    :
                        f,e = os.path.splitext(fn)
                        ffn = f + ("_%05u" % i) + e
                    if  os.path.isfile(ffn) :
                        if  os.path.getsize(ffn) == len(md) :
                            omd = tzlib.safe_read_whole_binary_file(ffn)
                            if  md == omd :
                                break

                            pass
                        pass
                    else    :
                        tzlib.write_whole_binary_file(ffn, md)
                        if  verbose > 1 :
                            print g.group(1), ffn
                        wrote   = True
                        break
                    i      += 1

                rv.append(ffn)

                if  wrote           :
                    if  True        :
                        d, f        = os.path.split(ffn)
                        dfn         = re.sub(r'(?i)^(img|images?)[_\-]*', "", f)
                        d           = tz_parse_time.parse_image_file_date_time(dfn)
                        if  d and (1830 <= d.year < 2050) :                             # well, the OS won't like dates before 1970, but that's its problem
                            try     :
                                t   = os.path.getmtime(ffn)
                                nt  = time.mktime(d.timetuple())
                                os.utime(ffn, ( os.path.getatime(ffn), nt ))            # try to set the date/time for the file to that of the file's name
                                if  verbose > 2 :
                                    print "utime ", ffn, time.ctime(t), time.ctime(os.path.getctime(ffn)), time.ctime(os.path.getmtime(ffn)), time.ctime(os.path.getatime(ffn)), time.ctime(nt)
                                pass
                            except ValueError :
                                pass
                            pass
                        pass
                    if  True    :                                                       # in practice, I've found no files with EXIF info, so this doesn't do anything
                        try     :
                            t   = os.path.getmtime(ffn)
                            nt  = tz_jpg.set_file_date_time_from_exif(ffn)              # let's let tz_jpg set the date/time from the EXIF values, if there are some
                            if  (verbose > 2) and (nt >= 0) and (nt != t) :
                                print "tz_jpg", ffn, time.ctime(t), time.ctime(os.path.getctime(ffn)), time.ctime(os.path.getmtime(ffn)), time.ctime(os.path.getatime(ffn)), time.ctime(nt)
                            pass
                        except ValueError :
                            pass
                        pass
                    pass
                pass
            elif verbose :
                print 'Bad file name="%s" cl="%s"' % ( g.group(2), g.group(3), )
            pass
        elif verbose > 3 :                                                      # this catches a number of things with no data="", for instance
            print 'Bad part: <part %s...' % pt[:200]
        pass

    return(rv)



def oops(err_msg) :
    sys.stderr.write(err_msg + "\n")
    sys.exit(1)


if  __name__ == '__main__' :
    import  TZCommandLineAtFile

    program_name    = sys.argv.pop(0)
    TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv, encoding = 'utf8')
    sys.argv.insert(0, program_name)

    out_dir         = None          # where to put the output files.
    verbose         = 0             # how printy we are

    argv            = sys.argv[1:]

    if  (not len(sys.argv)) or (tzlib.array_find(argv, [ "--help", "-h", "-?", "/?", "-?", "/h", "/H", ] ) >= 0) :
        print(__doc__ % ( os.path.basename(program_name), ))
        sys.exit(0)                     # well, there goes my 254 :)

    while True      :
        oi          = tzlib.array_find(argv, [ "--verbose", "-v", ])
        if  oi < 0  : break
        del(argv[oi])
        verbose    += 1

    while len(argv) :
        aa  = argv.pop(0)
        a   = aa

        if  tzlib.find_argi(a, [ "--out_dir", "--output", "--out", "-o", ]) >= 0 :
            out_dir = tzlib.expand_user_vars(a)
            continue

        if  True    :
            fn      = tzlib.expand_user_vars(a)
            fns     = glob.glob(fn)
            if  not len(fns) :
                oops("I cannot find a file named %s!" % a)

            out_dir = out_dir or os.getcwd()
            tzlib.safe_makedirs(out_dir)

            for fn in tzlib.make_dictionary([ os.path.abspath(fnn) for fnn in fns ]).keys() :
                mfns    = extract_media_files(fn, out_dir, verbose = verbose)
                print " ", "\n  ".join(mfns)
            pass

        pass

    pass

#
# eof
