#!/usr/bin/python

# mailheader_dnsbl.py
#       --copyright--                   Copyright 2007 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       October 27, 2006        bar
#       October 28, 2006        bar     DETECT_HEADER
#       October 31, 2006        bar     always eliminate our header in any case
#       November 18, 2007       bar     turn on doxygen
#       November 21, 2007       bar     allow import
#       November 27, 2007       bar     insert boilerplate copyright
#       May 17, 2008            bar     email adr
#       July 25, 2008           bar     cache
#       July 26, 2008           bar     get rid of old OUR_HEADER's
#       July 28, 2008           bar     occasionally strip old stuff just for kicks
#       November 29, 2011       bar     pyflake cleanup
#       May 27, 2012            bar     doxygen namespace
#       October 9, 2019         bar     in any case, whack the tmp cache file
#       --eodstamps--
##      \file
#       \namespace              tzpython.mailheader_dnsbl
#
#
#       Put a X-tz-dnsbl-found: header on email that tests positive for RBL hits to the given count.
#       The RBL lookups are done on the "X-From-IPadr" header's address.
#
#


import  cPickle
import  os
import  random
import  time

import  tzlib
import  replace_file
import  tzemail



OUR_HEADER      =   "X-tz-dnsbl-found: "
DETECT_HEADER   =   "X-tz-dnsbl-learn: "


DATA_TIMEOUT    =   3600.0 * 5


def read_cache(cache_file_name, ip, data_timeout = DATA_TIMEOUT) :
    data_timeout    = data_timeout or DATA_TIMEOUT

    bha         = []
    ca          = None
    if  cache_file_name :
        try     :
            fi  = open(cache_file_name, "rb")
            ca  = cPickle.load(fi)
            car = ca.get(ip, None)
            if  car :
                if  time.time() - car[1] > data_timeout :
                    del(ca[ip])                             # delete the timed out entry from the cache
                else :
                    bha = car[0]
                pass
            pass
        except  :
            ca  = None

        try :
            fi.close()
        except :
            pass
        pass

    return( ( ca or {}, bha ) )




def fixup_email_header(email, cutoff = 2, data_timeout = DATA_TIMEOUT, cache_file_name = None) :
    """
        Put our header in an email depending upon what the RBL situation is.
    """

    data_timeout    = data_timeout or DATA_TIMEOUT

    tzemail.eliminate_email_header(email, DETECT_HEADER)
    tzemail.eliminate_email_header(email, OUR_HEADER)

    hdrs    = tzemail.find_email_headers(email, "X-From-IPadr")
    if  len(hdrs)  == 1 :

        ip          = hdrs[0]

        changed     = False
        ( ca, bha ) = read_cache(    cache_file_name, ip, data_timeout)

        if  not bha     :
            bha         = tzemail.dnsbl_count(ip, stop_on_count = cutoff)
            ( ca, bhb ) = read_cache(cache_file_name, ip, data_timeout)                 # looking up the ip or name has taken some time, so another process may have updated the cache
            if  (not ca ) or (not bhb) :
                ca[ip]      = [ bha, time.time() ]                                      # add an entry to the cache
                changed     = True                                                      # and note the cache must be written
            pass

        if  bha[0] >= cutoff :
            tzemail.add_email_header(      email, OUR_HEADER    + " ".join(bha[2]))
        elif  bha[0] >  0 :
            tzemail.add_email_header(      email, DETECT_HEADER + " ".join(bha[2]))


        if  cache_file_name :

            if  random.randint(1, 1000) :
                t       = time.time()
                for ip in list(ca.keys()) :
                    if  t - ca[ip][1] > data_timeout :
                        del(ca[ip])                                                     # clear entry that has been in the cache long enough
                        changed = True                                                  # the cache is changed, so write it out
                    pass
                pass

            if  changed :
                try         :
                    ps      = cPickle.dumps(ca, cPickle.HIGHEST_PROTOCOL)

                    tfn     = cache_file_name + ".tmp_" + str(os.getpid())
                    try     :
                        fo  = open(tfn, "wb")
                        fo.write(ps)
                        fo.close()
                        del(fo)
                        replace_file.replace_file(cache_file_name, tfn, cache_file_name + ".bak")
                    except IOError :
                        pass
                    except OSError :
                        pass
                    tzlib.whack_file(tfn)
                except cPickle.PicklingError :
                    pass
                pass
            pass
        pass


    return(email)



if __name__ == '__main__' :
    import  sys

    import  tz_parse_time
    import  TZCommandLineAtFile


    del sys.argv[0]

    TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv)

    cutoff          = 2
    cache_file_name = None
    data_timeout    = None

    while True :
        oi  = tzlib.array_find(sys.argv, [ "--cutoff", "-c" ] )
        if  oi < 0 :    break
        del sys.argv[oi]
        cutoff          = int(sys.argv.pop(oi))


    while True :
        oi  = tzlib.array_find(sys.argv, [ "--data_timeout", "-t" ] )
        if  oi < 0 :    break
        del sys.argv[oi]
        data_timeout    = tz_parse_time.parse_time_zone(sys.argv.pop(oi))           # in h:m:s h h:m form
        if  data_timeout == None :
            raise("I cannot understand the timezone/offset!")
        pass


    while True :
        oi  = tzlib.array_find(sys.argv, [ "--cache" ] )
        if  oi < 0 :    break
        del sys.argv[oi]
        cache_file_name = sys.argv.pop(oi)


    if  len(sys.argv) > 2 :
        raise("Just tell me an input and output file. Nothing more.")


    if  len(sys.argv) > 0 :
        fi  = open(sys.argv.pop(0), "r")
    else :
        fi  = sys.stdin

    email   = tzemail.read_email_from_file(fi)

    if  fi != sys.stdin :
        fi.close()


    fixup_email_header(email, cutoff, data_timeout = data_timeout, cache_file_name = cache_file_name)


    if  len(sys.argv) > 0 :
        fo  = open(sys.argv.pop(0), "w")
    else :
        fo  = sys.stdout

    #
    #   Write the email out to stdout
    #
    for li in email :
        print >> fo, li

    if  fo != sys.stdout :
        fo.close()

    pass


#
#
#
# eof
