#!/usr/bin/python

# ssh_check_remote_data.py
#       --copyright--                   Copyright 2010 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       April 27, 2010          bar
#       November 29, 2011       bar     pyflake cleanup
#       --eodstamps--
##      \file
#
#
#       Check the actual disk data contents on a remote machine using ssh and md5sum.
#       Keep a local record file containing the latest md5sum output to check against.
#
#

import  subprocess
import  os
import  re
import  sys

import  replace_file
import  tzlib


NEW_FILE_ERR_CODE   = 189
ERROR_ERR_CODE      = 199

help_str    = """
%s (options) (user@)host:file_name_or_ambiguous_file_name (local_record_file)

Options:

    --id_file   file_name           Set the SSH identity file name.
    --port      port_number         Set the port number (default: 22)
    --fix_errors                    Write the state to the local record even if there are (199-ish) errors.

Run md5sums on an ambigious file name on a remote machine using ssh.
Or get a file piped from md5sum's output from a remote machine using scp.
Keep a record of the md5's locally and return error codes depending upon changes that have happened to the files.

To detect whether a remote file archive has had a file content change,  run this script and sense error code 199.
To detect whether a remote file archive has had a new file added,       run this script and sense error code 189.

Return   0 if the files are exactly as expected in the record file.
Return 199 if any files have changed.
Return 189 if there are new files (and the local record of them is now updated).
199 supercedes 189.

"""

#
#
#
if __name__ == '__main__' :

    import  TZCommandLineAtFile

    program_name    = sys.argv.pop(0)
    TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv)

    icmd            = ""
    pcmd            = ""
    fix_errors      = False
    do_file         = False

    if  tzlib.array_find(sys.argv, [ "--help", "-h", "-?", "/?", ] ) >= 0 :
        print help_str % ( os.path.basename(program_name), )
        sys.exit(254)

    while True :
        oi  = tzlib.array_find(sys.argv, [ "--id", "--identity", "--id_file", "--id-file", "--idfile", "-i", ] )
        if  oi < 0 :    break
        del sys.argv[oi]
        icmd    = """ -i "%s" """ % sys.argv.pop(oi)


    while True :
        oi  = tzlib.array_find(sys.argv, [ "--port", "-p", ] )
        if  oi < 0 :    break
        del sys.argv[oi]
        pcmd    = """ -p "%u" """ % int(sys.argv.pop(oi))


    while True :
        oi  = tzlib.array_find(sys.argv, [ "--fix_errors", "--fix-errors", "--fixerrors", "-e", ] )
        if  oi < 0 :    break
        del sys.argv[oi]
        fix_errors  = True


    while True :
        oi  = tzlib.array_find(sys.argv, [ "--file", "-f", ] )
        if  oi < 0 :    break
        del sys.argv[oi]
        do_file     = True


    if  not len(sys.argv) :
        print "I need to know the remote location to check!"
        sys.exit(101)

    loc         = sys.argv.pop(0)

    locf        = tzlib.file_name_able(loc).replace(" ", "_").replace(".", "_").replace("@", "_") + "_dir_md5sum.txt"
    if  len(sys.argv) :
        locf    = sys.argv.pop(0)
    tfn         = locf + ".tmp"


    if  len(sys.argv) :
        print "Unknown command line args %s !" % str(sys.argv)
        sys.exit(102)

    locd        = ""
    if  os.path.exists(locf) :
        locd    = tzlib.read_whole_text_file(locf)

    locp        = ""
    g           = re.search(r":(.*)$", loc)
    if  g       :
        locp    = g.group(1)
        loc     = re.sub(r":(.*)$", "", loc)
    locp        = locp.replace(" ", "\\ ")              # !!!! not sufficient escaping

    retval      = 0

    if  do_file or (locp and (locp.find("*") < 0) and (locp.find("?") < 0)) :

        if  os.path.exists(tfn) :
            os.remove(tfn)

        cmd     = """scp %s %s "%s" "%s" """ % ( pcmd.upper(), icmd, loc + ":" + locp, tfn )
        print "cmd[%s]" % cmd

        p       = subprocess.Popen(cmd, shell = True, stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, close_fds = True)
        rbuf    = p.communicate()[0]
        print rbuf


        if  not os.path.exists(tfn) :
            print "Could not retrieve [%s]!" % locp
            sys.exit(109)

        rbuf    = tzlib.read_whole_text_file(tfn)

    else        :
        locp    = locp or "."

        cmd     = """ssh -o ServerAliveInterval=5 %s %s %s "find '%s' -name '%s' -print0 | xargs -0 md5sum ; exit" """ % ( icmd, pcmd, loc, os.path.dirname(locp), os.path.basename(locp), )
        # print "cmd[%s]" % cmd

        p       = subprocess.Popen(cmd, shell = True, stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, close_fds = True)

        rbuf    = p.communicate()[0]

        if  not rbuf.strip() :
            print "No md5's calulated for [%s]!" % locp
            sys.exit(112)
        pass

    hash_re     = re.compile(r"^([0-9a-f]{32}) (.*?)\s*$", re.MULTILINE + re.DOTALL)
    md5s        = hash_re.findall(rbuf)

    if  not len(md5s) :
        print "No md5's on remote!"
        sys.exit(108)

    remd        = "\n".join([ sa[0] + " " + sa[1] for sa in md5s ] ) + "\n"

    locda       = hash_re.findall(locd)

    loch        = {}
    for sa in locda :
        loch[sa[1]] = sa[0]

    mistakes    = []
    for sa in md5s  :
        fn      = sa[1]
        hash    = sa[0]
        if  fn in loch :
            if  loch[fn] == hash :
                # print "File ok: %s %s" % ( hash, fn )
                pass
            else    :
                mistakes.append("Wrong hash for %s: %s should be %s" % ( fn, hash, loch[fn] ) )
                retval  = ERROR_ERR_CODE
            del(loch[fn])
        else        :
            if  not retval :
                retval  = NEW_FILE_ERR_CODE
            print "New file: %s %s" % ( hash, fn )
            pass
        pass
    for fn in loch.keys() :
        mistakes.append("File %s gone" % fn)
        retval  = ERROR_ERR_CODE

    if  mistakes :
        print "\n".join(mistakes) + "\n"

    if  (retval != ERROR_ERR_CODE) or fix_errors :
        tzlib.write_whole_text_file(tfn, remd)
        replace_file.replace_file(locf, tfn, locf + ".bak")

    sys.exit(retval)


#
#
#
# eof

