#!/usr/bin/python

# strip_files.py
#       --copyright--                   Copyright 2007 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       October 1, 2006         bar
#       November 20, 2006       bar     decomment_cpp_cmt_str_re
#       February 9, 2007        bar     todo
#       February 11, 2007       bar     curly brack and semi-colon stuff
#       February 12, 2007       bar     round and square bracks at end or beginning of line
#                                       unsafe_comment
#       February 15, 2007       bar     colon and slightly more aggressive ridding of spaces in that area
#       March 20, 2007          bar     string version
#                                       .h files
#       March 22, 2007          bar     fix the strip file routine, changing the API for strip_file_to_string()
#       November 18, 2007       bar     turn on doxygen
#       November 20, 2007       bar     comments
#                                       move lf_only() and no_blank_lines() over to tzlib.py, changing lf_only() slightly (\r\r\n is now \n, not \n\n)
#                                       move the whole file read routine over to tzlib.py
#       November 27, 2007       bar     insert boilerplate copyright
#       May 17, 2008            bar     email adr
#       December 19, 2010       bar     expose the string rtns
#       May 27, 2012            bar     doxygen namespace
#       March 3, 2023           bar     python3
#       --eodstamps--
##      \file
#       \namespace              tzpython.strip_files
#
#
#       Strip comments and whitespace out of files
#
#
#       TODO:
#
#
#

from    __future__  import  print_function

import  glob
import  os
import  re
import  sys

import  replace_file
import  TZCommandLineAtFile
import  tzlib


##                  We can put output comments to the top or bottom of the stripped file (the comment being, presumably, a copyright statement or some-such)

##                  Put the output comment in a safe place.
COMMENT_SAFE    =    0

##                  Put the output comment at the top of the output file.
COMMENT_ON_TOP  =    1

##                  Put the output comment at the end of the output file.
COMMENT_AT_END  =   -1


##                                  regx to sense C comments
decomment_c_str_re              =   re.compile(r"/\*.*?\*/",            re.DOTALL)

##                                  regx to sense C++ comments
decomment_cpp_cmt_str_re        =   re.compile(r"^\s*//.*?$",           re.MULTILINE)

##                                  regx to sense SGML comments
decomment_sgml_str_re           =   re.compile(r"<!--.*?-->",           re.DOTALL)

##                                  regx to sense hash-line comments
decomment_hash_str_re           =   re.compile(r"^\s*#.*?$",            re.MULTILINE)


##                                  regx to sense continued lines
join_continued_lines_re         =   re.compile(r"\\\n",                 re.DOTALL)


##                                  regx to sense left curlies and new lines
join_left_curly_brack_re        =   re.compile(r"\n?\{\n?",             re.DOTALL)

##                                  regx to sense right curlies and new lines
join_rite_curly_brack_re        =   re.compile(r"\n?\}\n?",             re.DOTALL)

##                                  regx to sense right-curlie-semi-colon and new lines
join_rite_curly_brack_semi_re   =   re.compile(r"\s*\};\n",             re.DOTALL)


##                                  regx to sense left round brackets and new lines
join_left_round_brack_re        =   re.compile(r"\n?\(\n?",             re.DOTALL)

##                                  regx to sense right round brackets and new lines
join_rite_round_brack_re        =   re.compile(r"\n?\)\n?",             re.DOTALL)

##                                  regx to sense right-round-bracket-semi-colon and new lines
join_rite_round_brack_semi_re   =   re.compile(r"\s*\);\n",             re.DOTALL)


##                                  regx to sense left square brackets and new lines
join_left_square_brack_re       =   re.compile(r"\n?\[\n?",             re.DOTALL)

##                                  regx to sense right square brackets and new lines
join_rite_square_brack_re       =   re.compile(r"\n?\]\n?",             re.DOTALL)


##                                  regx to sense space-right-square-brackets-new-line
join_rite_square_brack_semi_re  =   re.compile(r"\s*\];\n",             re.DOTALL)


##                                  regx to sense semi-colon-new-line
join_semi_eol_re                =   re.compile(r"\s*;\n",               re.DOTALL)

##                                  regx to sense colon-new-line
join_colon_eol_re               =   re.compile(r"\s*:\n",               re.DOTALL)


##                                  regx to sense leading white space in inner text lines
leading_white_space_re          =   re.compile(r"^\s+",                 re.MULTILINE)

##                                  regx to sense white space before any strings in inner text lines
lembedded_white_space_re        =   re.compile(r"^([^\"\']*?)\s{2,}",   re.MULTILINE)


def strip_unquote_white_space(fs) :
    """ Get rid of unquote/unstring white space from the given string. """

    fs  = leading_white_space_re.sub("", fs)

    while   True :
        fsn = lembedded_white_space_re.sub(r"\g<1> ", fs)
        if  (not fsn) or (len(fsn) == len(fs)) :
            break
        fs  = fsn

    fs      = tzlib.strrev(fs)
    while   True :
        fsn = lembedded_white_space_re.sub(r"\g<1> ", fs)
        if  (not fsn) or (len(fsn) == len(fs)) :
            break
        fs  = fsn
    fs      = tzlib.strrev(fs)

    return(fs)


def join_continued_lines(fs) :
    """ Combine continued lines - lines ending with a backslash, that is. """

    fs  = join_continued_lines_re.sub("", fs)

    return(fs)


def join_bracket_lines(fs) :
    """
        Combine lines that begin and end with various brackets - as no white space is required after the bracket.
    """

    fs  = join_left_curly_brack_re.sub("{", fs)
    fs  = join_rite_curly_brack_re.sub("}", fs)
    fs  = join_rite_curly_brack_semi_re.sub("};", fs)
    fs  = join_left_round_brack_re.sub("(", fs)
    fs  = join_rite_round_brack_re.sub(")", fs)
    fs  = join_rite_round_brack_semi_re.sub(");", fs)
    fs  = join_left_square_brack_re.sub("[", fs)
    fs  = join_rite_square_brack_re.sub("]", fs)
    fs  = join_rite_square_brack_semi_re.sub("];", fs)
    fs  = join_semi_eol_re.sub(";", fs)
    fs  = join_colon_eol_re.sub(":", fs)

    return(fs)


def strip_curly_string(fs, comment = None, comment_location = COMMENT_SAFE) :
    start_comment   = end_comment   = None

    fs  = decomment_c_str_re.sub("", fs)
    fs  = decomment_cpp_cmt_str_re.sub("", fs)
    if  comment :
        if  comment_location != COMMENT_AT_END :
            start_comment   = "/* "     + comment + " */\n\n"
        else :
            end_comment     = "\n\n/* " + comment + " */\n"
        pass
    fs  = tzlib.no_blank_lines(fs)
    fs  = strip_unquote_white_space(fs)
    fs  = join_continued_lines(fs)
    fs  = join_bracket_lines(fs)

    return(fs, start_comment, end_comment)


def strip_angle_string(fs, comment = None, comment_location = COMMENT_SAFE) :
    start_comment   = end_comment   = None

    fs  = decomment_sgml_str_re.sub("", fs)
    if  comment :
        if  comment_location == COMMENT_ON_TOP :
            start_comment   = "<!-- "     + comment + " -->\n\n"
        else :
            end_comment     = "\n\n<!-- " + comment + " -->\n"
        pass
    fs  = tzlib.no_blank_lines(fs)
    fs  = strip_unquote_white_space(fs)

    return(fs, start_comment, end_comment)


def strip_make_string(fs, comment = None, comment_location = COMMENT_SAFE) :
    start_comment   = end_comment   = None

    fs  = decomment_hash_str_re.sub("", fs)
    if  comment :
        if  comment_location != COMMENT_AT_END :
            start_comment   = "# "     + comment + "\n\n"
        else :
            end_comment     = "\n\n# " + comment + "\n"
        pass

    return(fs, start_comment, end_comment)


def strip_conf_string(fs, comment = None, comment_location = COMMENT_SAFE) :
    start_comment   = end_comment   = None

    ( fs, start_comment, end_comment )  = strip_make_string(fs, comment = comment, comment_location = comment_location)

    fs  = tzlib.no_blank_lines(fs)
    fs  = re.sub(r"\s+=\s+", " = ", fs)

    return(fs, start_comment, end_comment)


def strip_file_to_string(fname, comment = None, comment_location = COMMENT_SAFE) :
    """
        Strip the given file to a string.
    """

    fs              = None

    fnl             = fname.lower()

    start_comment   = end_comment   = None

    if  fnl.endswith(".c") or fnl.endswith(".h") or fnl.endswith(".cpp") or fnl.endswith(".java") or fnl.endswith(".js") or fnl.endswith(".css") :
        fs  = tzlib.read_whole_text_file(fname)
        ( fs, start_comment, end_comment )  = strip_curly_string(fs, comment = comment, comment_location = comment_location)

    if  fnl.endswith(".htm") or fnl.endswith(".html") or fnl.endswith(".xml") or fnl.endswith(".dtd") or fnl.endswith(".xul") or fnl.endswith(".rdf") :
        fs  = tzlib.read_whole_text_file(fname)
        ( fs, start_comment, end_comment )  = strip_angle_string(fs, comment = comment, comment_location = comment_location)

    if  fnl.endswith(".mak") :
        fs  = tzlib.read_whole_text_file(fname)
        ( fs, start_comment, end_comment )  = strip_make_string(fs, comment = comment, comment_location = comment_location)

    if  fnl.endswith(".properties") or fnl.endswith(".conf") :
        fs  = tzlib.read_whole_text_file(fname)
        ( fs, start_comment, end_comment )  = strip_conf_string(fs, comment = comment, comment_location = comment_location)

    return( [ fs, start_comment, end_comment ] )


def strip_file(fname, ofile_name = None, comment = None, comment_location = COMMENT_SAFE) :
    """
        Strip the given file, outputting the results to the given output file (input file, in none given).
    """

    if  not ofile_name :
        ofile_name  = fname

    ( fs, start_comment, end_comment )  = strip_file_to_string(fname, comment, comment_location)

    if  fs == None :
        return(False)

    if  start_comment :
        fs  = start_comment + fs

    if  end_comment :
        fs += end_comment

    tname   = ofile_name + ".tmp"

    fo      = open(tname, "w")
    fo.write(fs)
    fo.close()

    replace_file.replace_file(ofile_name, tname, ofile_name + ".bak")

    return(True)


def _do_help() :
    """
        Help the human.
    """

    print("""
python strip_files (options) ambiguious_file(s)

    Strips comments from given file(s), replacing the file.

Options:

    --subdirs       -s              Do all matching files in subdirectories, too.
    --comment       -c  comment     Insert the given comment.
    --start_comment -t              Force the comment insertion to the top of the file.
    --end_comment   -e              Force the comment insertion to the end of the file.
    --safe_comment                  Insert comment in safe position (default).

""")

    sys.exit(255)


class a_thang :
    """ Class for command line use. """


    def _parse_cmd_line(me) :
        """
            Parse command line parameters
        """


        del sys.argv[0]

        TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv)


        while True :
            oi  = tzlib.array_find(sys.argv, [ "--help", "-h", "-?" ] )
            if  oi < 0 :    break
            del sys.argv[oi]
            _do_help()


        while True :
            oi  = tzlib.array_find(sys.argv, [ "--subdirs", "-s" ] )
            if  oi < 0 :    break
            del sys.argv[oi]
            me.do_subdirs       =   True

        while True :
            oi  = tzlib.array_find(sys.argv, [ "--comment", "-c" ] )
            if  oi < 0 :    break
            del sys.argv[oi]
            me.comment          =   sys.argv.pop(oi)

        while True :
            oi  = tzlib.array_find(sys.argv, [ "--start_comment", "-t" ] )
            if  oi < 0 :    break
            del sys.argv[oi]
            me.comment_location =   COMMENT_ON_TOP

        while True :
            oi  = tzlib.array_find(sys.argv, [ "--end_comment", "-e" ] )
            if  oi < 0 :    break
            del sys.argv[oi]
            me.comment_location =   COMMENT_AT_END

        while True :
            oi  = tzlib.array_find(sys.argv, [ "--safe_comment" ] )
            if  oi < 0 :    break
            del sys.argv[oi]
            me.comment_location =   COMMENT_SAFE

        pass


    def __init__(me) :
        """ Constructor. """
        me.do_subdirs       =   False
        me.comment          =   None
        me.comment_location =   COMMENT_SAFE

        me.base_amb_name    =   ""

        me._parse_cmd_line()


    def do_a_file(me, fname) :
        strip_file(fname, comment = me.comment, comment_location = me.comment_location)


    pass            # a_thang


def do_em(me, s) :
    """ Strip the file(s) given by the ambiguous name. """

    for fname in glob.glob(s) :
        me.do_a_file(fname)
    pass


def do_dir(me, dir_name, names) :
    """ Strip all the files in the given directory. 'names' is unused. """

    if  me.base_amb_name :
        do_em(me, os.path.abspath(os.path.join(dir_name, me.base_amb_name)))
    else :
        do_em(me, os.path.abspath(os.path.join(dir_name, "*")))
    pass


if  __name__ == '__main__' :

    me = a_thang()

    if  len(sys.argv) < 1 :
        _do_help()

    for s in sys.argv :

        if  s[0:1] == "-" :
            print('')
            print('')
            print('')
            print('')
            print('')
            print(  "Did you mean this to be a file or directory:", s)
            print('')
            print('')
            print('')
            print('')
            print('')

        if me.do_subdirs :
            if  not os.path.isdir(s) :
                ( s, me.base_amb_name ) = os.path.split(os.path.normpath(s))
                if  s == "" :   s = "."
                if  not os.path.isdir(s) :
                    do_dir(me, s, [] )
                else :
                    os.path.walk(s, do_dir, me)
                pass
            else :
                os.path.walk(    s, do_dir, me)
            pass

        elif    not os.path.isdir(s) :
            do_em(me, s)

        else :
            do_dir(me, s, [])

        pass

    pass


#
#
# eof