#!/usr/bin/python

# mailbox_dnsbl.py
#       --copyright--                   Copyright 2007 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       October 27, 2006        bar
#       November 18, 2007       bar     turn on doxygen
#       November 27, 2007       bar     insert boilerplate copyright
#       May 17, 2008            bar     email adr
#       November 29, 2011       bar     pyflake cleanup
#       May 27, 2012            bar     doxygen namespace
#       --eodstamps--
##      \file
#       \namespace              tzpython.mailbox_dnsbl
#
#
#       Scan mail box files, listing emails whose X-From-IPadr address is listed in black hole spammer list(s).
#
#

import  re
from    types   import  ListType, TupleType

import  tzlib
import  tzemail



ip_re   = re.compile(r"(\d+\.\d+\.\d+\.\d+)$")


def find_black_holes(files, stop_on_count = 0, since = 0, show_info = False) :

    if  (not isinstance(files, ListType)) and (not isinstance(files, TupleType)) :
        files       = [ files ]

    since           = since         or 0
    stop_on_count   = stop_on_count or 0

    ips             = {}

    for fn in files :
        emails      = tzemail.read_mbox_file(fn)
        for email   in emails :
            when    = -1
            if  tzemail.from_re.match(email[0]) :
                when    = tzemail.from_time(email[0])

            if  (when < 0) or (when >= since) :
                hdrs    = tzemail.find_email_headers(email, "X-From-IPadr")
                if  len(hdrs) == 1 :
                    ip  = hdrs[0]
                    if  not ips.has_key(ip) :
                        bha = tzemail.dnsbl_count(ip, stop_on_count = stop_on_count)
                        if  bha[0] > 0 :
                            if  show_info :
                                print "bha", bha
                            ips[ip] = bha[2]
                        else :
                            if  show_info :
                                print "OK", ip
                            pass
                        pass
                    pass
                pass
            pass
        pass

    return(ips)




def unique_black_hole_list_servers(ips_found_dict) :
    """
        From the dictionary returned from find_black_holes(),
        return an array of unique black hole lists that found IP addresses in them.
    """
    bhs = []
    for v in ips_found_dict.values() :
        bhs     += v
    bhs = tzlib.without_dupes(bhs)

    return(bhs)



#
#
#
if __name__ == '__main__' :
    import  sys

    import  TZCommandLineAtFile

    del sys.argv[0]

    TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv)

    class   a_thang :
        def _parse_cmd_line(me) :
            me.cutoff       = 0
            me.since        = 0
            me.do_sub_dirs  = False

            while True :
                oi  = tzlib.array_find(sys.argv, [ "--cutoff", "-c" ] )
                if  oi < 0 :    break
                del sys.argv[oi]
                me.cutoff       = int(sys.argv.pop(oi))

            while True :
                oi  = tzlib.array_find(sys.argv, [ "--since" ] )
                if  oi < 0 :    break
                del sys.argv[oi]
                me.since        = int(sys.argv.pop(oi))

            while True :
                oi  = tzlib.array_find(sys.argv, [ "--subdirs", "-s" ] )
                if  oi < 0 :    break
                del sys.argv[oi]
                me.do_sub_dirs  = True

            pass


            me.files            = []
            while len(sys.argv) :
                me.files       += tzlib.ambiguous_file_list(sys.argv.pop(0), me.do_sub_dirs)

            pass


        def __init__(me) :
            me._parse_cmd_line()


        pass



    me  = a_thang()

    ips = find_black_holes(me.files, stop_on_count = me.cutoff, since = me.since, show_info = True)

    bhs = unique_black_hole_list_servers(ips)


    print "Found", len(ips), "ip addresses in", len(bhs), "."
    print ips.keys()
    print bhs

    pass



__ALL__ = [
            'find_black_holes',
            'unique_black_hole_list_servers',
          ]

#
#
#
# eof
