from appscript import *
import re

DATE_FORMATS = (('%m/%d/%y',  r'\d{1,2}/\d{1,2}/\d{1,2}'       ), # T-Mobile
                ('%m.%d.%y',  r'\d{1,2}\.\d{1,2}\.\d{1,2}'     ), # iFixit
                ('%b %d, %Y', r'[A-Z][a-z][a-z] \d{1,2}, \d{4}'), # AmerenIP
                ('%B %d, %Y', r'[A-Z][a-z]+  ?\d{1,2}, ?\d{4}' ), # Amazon
                ('of %Y%m%d', r'of \d{8}'                      ), # Amazon
                ('%m/%d/%Y',  r'\d{1,2}/\d{1,2}/\d{4}'         ), # Busey
                ('%b %d %Y',  r'[A-Z]{3} \d{1,2} \d{4}'        ), # State Farm
                ('%d %b %Y',  r'\d{1,2} [A-Z][A-Za-z]{2} \d{4}'), # Apple
                ('%Y-%m-%d',  r'\d{4}-\d{2}-\d{2}'             ), # MacSpeech
                ('%Y-%m',     r'\d{4}-\d{2}'                   ), # filename
                ('%m1%d/%y',  r'\d{1,2}1\d{1,2}/\d{1,2}'       ), # T-Mo bad OCR
                ('%m/%d1%y',  r'\d{1,2}/\d{1,2}1\d{1,2}'       ), # T-Mo bad OCR
                ('%m/%d/%y',  r'\d{1,2}/ \d{1,2}/ \d{1,2}'     ), # T-Mo bad OCR
                ('%m/%d/%Y',
                 r'(?:\d ?){1,2}/ (?:\d ?){1,2}/ (?:\d ?){4}'  ), # Busey bad OCR
                )

RE_DATE = re.compile('|'.join(r'(\b%s\b)' % regex
                              for format, regex in DATE_FORMATS))

def extract_date(contents, match=None):
    no_match = []
    for m in RE_DATE.finditer(contents):
        matched_format = m.lastindex
        format = DATE_FORMATS[matched_format - 1][0]
        # note: spaces in strptime format match zero or more spaces, this is OK
        matched = m.group(matched_format).replace(' ', '')
        try:
            parsed = datetime.datetime.strptime(matched, format)
        except ValueError, e: # not a date
            no_match.append((matched, format, e))
            continue
        if not match or (match.year, match.month) == (parsed.year, parsed.month):
            return parsed.date(), no_match
        no_match.append(m.group(matched_format))
    return None, no_match

EagleFiler = app(id='com.c-command.EagleFiler')
Paper = EagleFiler.documents['Paper.eflibrary']

for record in Paper.library_records[its.kind=='PDF']():
    title = record.title()
    hint, no_match = extract_date(title)

    contents = record.contents()
    extracted, no_match = extract_date(contents, hint)

    if not extracted:
        print title, hint
        for nm in no_match:
            print '  no match', nm
        if not hint:
            continue

    record.creation_date.set(extracted or hint)