#!/usr/bin/python

# tz_pwi.py
#       --copyright--                   Copyright 2009 (C) Tranzoa, Co. All rights reserved.    Warranty: You're free and on your own here. This code is not necessarily up-to-date or of public quality.
#       --url--                         http://www.tranzoa.net/tzpython/
#       --email--                       pycode is the name to send to. tranzoa.com is the place to send to.
#       --bodstamps--
#       June 5, 2009            bar
#       June 6, 2009            bar     be more robust in the face of whatever
#       --eodstamps--
##      \file
#
#
#       Do things with WinMobile Notes' .pwi files.
#
#       Command line:
#
#           input_pwi_file output_txt_file
#
#       This thing doesn't really work all that well for anything but text files.
#           That is, images, drawing and web links really mess things up.
#
#           Better is to use tz_word_doc.py if possible. That script requires Word 2007, though.
#           And for pwi files, Active Sync must have been installed, apparently, as it adds an input filter for pwi files to Word.
#
#

import  re


txt_hdr_re  = re.compile(r"A\0....(..)(?:...)?.....................\000\xe6\n\0(.*)".decode('latin1'), re.DOTALL)


def extract_text(fd) :
    """ Extract text from a .pwi file's data, returning an array of paragraphs. """

    paras   = []

    while True :
        g   =   txt_hdr_re.search(fd)
        if  not g :
            break

        ei  = ord(g.group(1)[0]) + (ord(g.group(1)[1]) * 256)
        ei -= 1

        if  True or re.search(r"^.{,5]}?-\0{4,5}B", g.group(2)) :
            txt = ""
            fd  = g.group(2)
            while ei :
                i   = min(ei, 6)
                while i > 0 :
                    try :
                        txt    += fd[:i].decode('utf8')
                        fd      = fd[i:]
                        ei     -= i
                        break
                    except UnicodeEncodeError :
                        pass
                    i          -= 1
                if  not i :
                    break
                pass

            paras.append(txt)

        else    :
            fd  = g.group(2)[1:]

        pass

    return(paras)


#
#
#   Test code.
#
#
if __name__ == '__main__' :
    import  sys


    import  replace_file
    import  TZCommandLineAtFile
    import  tzlib

    program_name    = sys.argv.pop(0)

    TZCommandLineAtFile.expand_at_sign_command_line_files(sys.argv)

    if  len(sys.argv) != 2 :
        print "Tell me input .pwi file and an output .txt file name!"
        sys.exit(101)

    fin = sys.argv[0]
    fon = sys.argv[1]


    fd  = tzlib.read_whole_binary_file(fin).decode('latin1')

    pa  = extract_text(fd)

    if  not pa :
        print "No text found in %s!" % fin
        sys.exit(101)

    tfn = fon + ".tmp"
    fo  = open(tfn, "w")
    for p in pa :
        fo.write(p.encode('utf8'))
        fo.write("\n")

    fo.close()

    replace_file.replace_file(fon, tfn, fon + ".bak")


#
#
#
# eof

