#!/usr/bin/python
# -*- coding: utf8 -*-

# tzemail.py
#       --copyright--                   Copyright 2007 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       August 30, 2005         bar
#       October 19, 2006        bar     expose the re's
#       October 27, 2006        bar     dnsbl_count
#       October 31, 2006        bar     fix a comment
#       May 28, 2007            bar     email_adr_re
#       November 18, 2007       bar     turn on doxygen
#       November 27, 2007       bar     insert boilerplate copyright
#       May 17, 2008            bar     email adr
#       August 29, 2008         bar     basestring instead of StringType because of unicode strings and others
#       May 18, 2009            bar     show_info for dnsbl spam check
#       May 23, 2009            bar     demote sorbs from the dnsrbl list (poisoned? google's server is a spammer and ohio seems to have a problem with them, too)
#       November 29, 2011       bar     pyflake cleanup
#       May 27, 2012            bar     doxygen namespace
#       June 2, 2013            bar     mail_files()
#       August 20, 2013         bar     mail_files sets the file name as the base file name, not the whole path
#       September 24, 2013      bar     file_name_filter to mail_files()
#       November 27, 2013       bar     subject is only the base file name
#       April 5, 2014           bar     mail_files() can take a list
#       May 27, 2014            bar     mail_files() can take text
#       April 10, 2016          bar     mail_files() sends only one email for all the files
#       --eodstamps--
##      \file
#       \namespace              tzpython.tzemail
#
#
#       Various email routines, which really shouldn't be used, but they are quick and easy.
#
#       Regex to pick off well-formed email addresses in text:
#
#           \b[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b
#
#

import  copy
import  email.mime.application
import  email.mime.text
import  email.mime.multipart
import  os
import  re
import  smtplib
import  socket
import  string
from    types   import  FileType, ListType, TupleType

import  tzlib
import  tz_parse_time


strip_eol_re    =   re.compile(r"[\r\n]+$",               re.DOTALL + re.MULTILINE)
# strip_eol_re  =   re.compile(r"\r?\n$", )
from_re         =   re.compile(r"^From .*\b(19|20)\d\d\b")
drop_hdr_re     =   re.compile(r"^\S+\s+")


email_adr_re    =   re.compile(r"\b[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", re.IGNORECASE)



def read_mbox_file(fi) :
    """
        Read emails from a Unix/Netscape mbox/mailbox file.
        'fi' is either:
            open, binary file
            a file name of an existing file

        Return an array of emails.

            Each email is an array of text lines without EOL characters.
            The first text line of each email will be the "From ..." line,
            unless the file is simply one email without a "From ..." line.
    """

    fr  = fi

    if  not isinstance(fr, FileType) :
        fr  = open(fi, "rb")                    # binary 'cause of Control Z's

    emails  = []
    emale   = []

    while True :
        li  = fr.readline()
        if  not li :    break

        li  = strip_eol_re.sub("", li)

        # print li

        if  from_re.match(li) :
            if  len(emale) :    emails.append(emale)
            emale   = []
        emale.append(li)

    if  len(emale) :            emails.append(emale)

    if  not isinstance(fi, FileType) :
        fr.close()

    return(emails)



def from_time(li) :
    """
        Return a Unix time value from a "From - Fri Oct 27 16:57:16 2006"
        line such as is the first line of emails in mailbox files.

        Return -1 if there is no time.
    """

    t   = tz_parse_time.parse_time(li)
    t   = t or -1

    return(t)





def read_email_from_file(fi) :
    """
        Read an email from a file (e.g. sys.stdin) in to an array of text lines lacking EOL characters.

        'fi' is either:
            open, binary file
            a file name of an existing file
    """

    fr  = fi

    if  not isinstance(fr, FileType) :
        fr  = open(fi, "rb")                    # binary 'cause of Control Z's

    emale   = []
    while True :
        li  = fr.readline()
        if  not li : break

        li  = strip_eol_re.sub("", li)

        # print li

        emale.append(li)

    if  not isinstance(fi, FileType) :
        fr.close()

    return(emale)


def get_flattened_email_headers_array(emale) :
    """
        Given an array of text lines lacking EOL characters in blank-line-separates-header-from-body form,
        return an array with multi-line headers combined in to single lines.
    """
    hdrs    = []
    for li  in emale :
        if  len(li) == 0 :  break

        c1  = li[0:1]
        if  ((c1 == ' ') or (c1 == '\t')) and (len(hdrs) > 0):
            hdrs[len(hdrs) - 1] += li
        else :
            hdrs.append(li)
        pass

    if  len(hdrs) == 0 :    return("")

    return(hdrs)



def get_flattened_email_headers_str(emale) :
    """
        Given an array of text lines lacking EOL characters in blank-line-separates-header-from-body form,
        return a string with multi-line headers combined in to single lines.
    """
    hdrs    = get_flattened_email_headers_array(emale)

    if  len(hdrs) == 0 :    return("")

    return(string.join(hdrs, "\n") + "\n")


def get_flattened_email_headers(emale) :
    """
        Given an array of text lines lacking EOL characters in blank-line-separates-header-from-body form,
        return a string with multi-line headers combined in to single lines.
    """

    return(get_flattened_email_headers_str(emale))


def get_email_body_string(emale) :
    """
        Given an array of text lines lacking EOL characters in blank-line-separates-header-from-body form,
        return a string containing the body of the email, LF line delimited.
    """

    try :
        i   = emale.index("") + 1
    except ValueError :
        i   = len(emale)
    if  i  >= len(emale) :  return("");

    return(string.join(emale[i:], "\n") + "\n")




def add_email_header(emale, hdr) :
    """
        Add the header line to an email (which is an array of text lines lacking EOL characters)
    """

    i   =   0
    while i < len(emale) :
        li  = emale[i]
        if  len(li) == 0 :  break
        i   += 1
    emale.insert(i, hdr)



def _fixed_header_name(hdr) :
    hdr =   hdr.lower().strip()
    if  not re.search(r":\s*$", hdr) :
        hdr += ": "

    return(hdr)



def eliminate_email_header(emale, hdr) :
    """
        Get rid of the given email header.
            'hdr' is case insensitive, optionally including the colon:space at the end.
            'emale' is an array of text lines lacking EOL characters.
    """

    hdr =   _fixed_header_name(hdr)

    i   =   0
    while i < len(emale) :
        li  = emale[i]
        if  len(li) == 0 :  break

        if  not li.lower().startswith(hdr) :
            i   += 1
        else :
            while True :
                del(emale[i])
                if  i >= len(emale) :   break

                li  = emale[i]
                if  len(li) == 0 :      break

                c1  = li[0:1]
                if  ((c1 != ' ') and (c1 != '\t')) :
                    break
                pass
            pass
        pass
    pass



def find_email_headers(emale, hdr) :
    """
        Get an array of flattened headers whose names match the desired header name.
            'hdr' is case insensitive, optionally including the colon:space at the end.
            'emale' is an array of text lines lacking EOL characters.
    """

    hdr     = _fixed_header_name(hdr)

    emale   = get_flattened_email_headers_array(emale)

    hdrs    = filter(lambda h : h.lower().startswith(hdr), emale)

    headers = map(lambda h : drop_hdr_re.sub(r"", h).strip(), hdrs)

    return(headers)



#
#
#       Big list of 'em at http://www.robtex.com/rbls/127.0.0.2.html
#       And, another at http://shopping.declude.com/Articles.asp?ID=97
#       And, another at http://www.dnsstuff.com/tools/ip4r.ch?ip=
#
#

black_hole_list = [
                    [ 'sbl-xbl.spamhaus.org',       5 ],

                    [ 'bl.spamcop.net',             5 ],
                    [ 'spam.dnsbl.sorbs.net',       1 ],
                    [ 'spam.dnsrbl.net',            5 ],

                    [ 'dnsbl-3.uceprotect.net',     1 ],

                    [ 'korea.services.net',         1 ],
                    [ 'no-more-funn.moensted.dk',   1 ],
                    [ 'dnsbl.jammconsulting.com',   1 ],
                    [ 'blocklist2.squawk.com',      1 ],
                    [ 'psbl.surriel.com',           1 ],

                    [ 'phishing.rbl.msrbl.net',     1 ],
                    [ 'map.spam-rbl.com',           1 ],
                    [ 'rbl.efnet.org',              1 ],

                    [ 'list.dsbl.org',              1 ],
                    [ 'multihop.dsbl.org',          2 ],

                    [ 'xbl.spamhaus.org',           1 ],
                  ]


#
#
#   dnsbl_count() can be called with these instead of with a black hole list.
#
#

white_list      = [
                    [ 'query.bondedsender.org',       1 ],
                  ]



have_adns   = True


def dnsbl_count(ip_or_name, dnsbl_names   = black_hole_list, stop_on_count = 0, show_info = False ) :
    """
        Look up an IP address or name in DNSBL listings.

        Pass an array of DNSBL hosts (each entry may be an array of [ host_name, count ]) if you want.

        Return ( hit_counts, total_count, [ hitted_dnsbl_name... ] )
    """

    global  have_adns


    stop_on_count   = stop_on_count or 0


    if  (not dnsbl_names) or (len(dnsbl_names) == 0) :
        dnsbl_names = black_hole_list

    dnsbl_names     = copy.deepcopy(dnsbl_names)
    if  (not isinstance(dnsbl_names, ListType)) and (not isinstance(dnsbl_names, TupleType)) :
        dnsbl_names = [ dnsbl_names, 1 ]


    try :
        ip      = socket.gethostbyname(ip_or_name)
    except socket.gaierror:
        return(-1)                                      # can't find the name or IP adr under question

    ip      = ip.split('.')
    ip.reverse()
    ip      = ".".join(ip) + "."

    # ip      = str(random.randint(1, 255)) + "." + str(random.randint(1, 255)) + "." + str(random.randint(1, 255)) + "." + str(random.randint(1, 255)) + "."

    cnt     = 0
    tcnt    = 0
    hits    = []

    if  have_adns :
        opts    = ""
        for i in xrange(len(dnsbl_names)) :
            dn          = dnsbl_names[i]
            if  (not isinstance(dn, ListType)) and (not isinstance(dn, TupleType)) :
                dn      = [ dn, 1 ]
            opts       += " " + ip + dn[0]

        if  show_info :
            print opts

        ( i, r )    = os.popen4("adnshost -a -ta " + opts, "b")
        rr  = r.read()
        r.close()
        i.close()
        if  rr or (rr.find(dnsbl_names[0][0]) < 0) :
            have_adns       = False
        else :
            for i in xrange(len(dnsbl_names)) :
                tcnt       +=      dn[1]
                w           = ip + dn[0]
                rg  = re.compile(r"\b" + re.escape(w) + r"\s+A\s+127\.0\.\0\.\d+\b", re.DOTALL)
                if  rg.search(rr) :
                    if  show_info :
                        print "hit", w

                    cnt += dn[1]
                    hits.append(dn[0])
                pass
            pass
        pass

    if  not have_adns :
        for i in xrange(len(dnsbl_names)) :
            dn          = dnsbl_names[i]
            if  (not isinstance(dn, ListType)) and (not isinstance(dn, TupleType)) :
                dn      = [ dn, 1 ]

            tcnt       +=      dn[1]
            w           = ip + dn[0]

            if  show_info :
                print w

            try:
                rip     = socket.gethostbyname(w)
                # print w, ip
                if  rip.startswith("127.0.0.") :
                    if  show_info :
                        print "hit", w

                    cnt += dn[1]
                    hits.append(dn[0])
                    if  (stop_on_count > 0) and (cnt >= stop_on_count) :
                        break
                    pass
                pass
            except socket.gaierror:
                pass
            pass
        pass

    return( ( cnt, tcnt, hits ) )           # tell caller how bad things are, how bad things could be, and what lists had the address



def mail_files(amb_file_name, from_adr, to_adr, subject = None, text = None, encoding = None, file_name_filter = None) :     # 0244 Ʉ   00a9 (copyright) ©     0110 D stroke Đ    0692 arabic reh with small v ڒ     3026 hangzhou number six 〦
    """ Mail files matching the given ambiguous name or mail the files listed. Return a list of the sent files' names. """

    if  isinstance(amb_file_name, basestring) :
        fns = tzlib.ambiguous_file_list(amb_file_name)
    else    :
        fns = list(amb_file_name)           # he passed a list of files

    if  len(fns) or text :
        s   = smtplib.SMTP()
        s.connect()
        msg = email.mime.multipart.MIMEMultipart()
        if  text    :
            text    = tzlib.convert_to_unicode(text).encode('utf8')
            msg.attach(email.mime.text.MIMEText(text, 'plain', 'utf8'))
        ffn         = ""
        for fn in fns :
            fd      = tzlib.read_whole_binary_file(fn)
            if  (not encoding) and re.search(r"[^\r\n\t -\~]", fd) :                # if he's not telling us an encoding and if anything but normal ASCII text is in the file
                m   = email.mime.application.MIMEApplication(fd)                    # send the file
            else    :
                m   = email.mime.text.MIMEText(fd, 'plain', encoding or 'utf8')     # otherwise send text
            fn      = (file_name_filter and file_name_filter(fn)) or fn
            ffn     = ffn or fn
            m.add_header('Content-Disposition', 'attachment', filename = os.path.basename(fn))
            msg.attach(m)

        msg['Subject']  = tzlib.best_ascii(subject or "") or re.sub(r'^(?:\r?\n)*([^\r\n]+)', r'\1', tzlib.best_ascii(text or "")) or (tzlib.best_ascii(os.path.basename(ffn)) + (((len(fns) > 1) and (" " + ("+" * (len(fns) - 1)))) or ""))
        msg['From']     = from_adr
        msg['To']       = to_adr

        ta              = [ to_adr ]
        sa              = [ a for a in to_adr.split(',') if a.strip() ]
        if  len(sa)     > 1 :
            ta          = sa

        msg             = msg.as_string()
        s.sendmail(from_adr, ta, msg)

        s.quit()

    return(fns)



#
#
#
if __name__ == '__main__' :

    import  sys

    show_info   = False

    if  len(sys.argv) > 1 :
        if  sys.argv[1] in [ '--send', '-s' ] :
            i   = 2
            enc = None
            if  sys.argv[i] in [ '--encoding', '-e' ] :
                i  += 1
                enc = sys.argv[i]
                i  += 1
            fns = mail_files(sys.argv[i], "tzemail_py@tranzoa.com", "test@tranzoa.com", text = ((len(sys.argv) > i + 1) and sys.argv[i + 1]) or None, encoding = enc)       # send the (double quotad on unix) files and any text that follows after the file and optional --enc ENCODING option
            print "Mailed:", fns
        elif os.path.isfile(sys.argv[1]) :                          # analyse an email file
            emails = read_mbox_file(sys.argv[1])
            print len(emails), "emails."
            if  len(emails) :
                print emails[0][0]
                print emails[0][1]
            pass

            emale = read_email_from_file(sys.stdin)                 # uh, also read an email from stdin? weird.
            print "Email is", len(emale), "lines long."
            if  len(emale) :
                print emale[0]
                print emale[1]
            pass
        else :                                                      # look up the given ip adr in the black hole lists (this is slow)
            print "Black hole",     dnsbl_count(sys.argv[1],                show_info = show_info)
            print "White listed",   dnsbl_count(sys.argv[1], white_list,    show_info = show_info)
        pass
    pass



#
#
# eof
