[564] | 1 | from appscript import *
|
---|
[566] | 2 | from datetime import datetime
|
---|
| 3 | from osax import *
|
---|
| 4 | from plistlib import readPlist, writePlist
|
---|
| 5 | import os
|
---|
[564] | 6 | import re
|
---|
[566] | 7 | import time
|
---|
[564] | 8 |
|
---|
[566] | 9 | PREFERENCES_PATH = \
|
---|
| 10 | os.path.expanduser('~/Library/Preferences/net.sabi.UpdateDates.plist')
|
---|
| 11 |
|
---|
[564] | 12 | DATE_FORMATS = (('%m/%d/%y', r'\d{1,2}/\d{1,2}/\d{1,2}' ), # T-Mobile
|
---|
| 13 | ('%m.%d.%y', r'\d{1,2}\.\d{1,2}\.\d{1,2}' ), # iFixit
|
---|
| 14 | ('%b %d, %Y', r'[A-Z][a-z][a-z] \d{1,2}, \d{4}'), # AmerenIP
|
---|
| 15 | ('%B %d, %Y', r'[A-Z][a-z]+ ?\d{1,2}, ?\d{4}' ), # Amazon
|
---|
| 16 | ('of %Y%m%d', r'of \d{8}' ), # Amazon
|
---|
| 17 | ('%m/%d/%Y', r'\d{1,2}/\d{1,2}/\d{4}' ), # Busey
|
---|
| 18 | ('%b %d %Y', r'[A-Z]{3} \d{1,2} \d{4}' ), # State Farm
|
---|
| 19 | ('%d %b %Y', r'\d{1,2} [A-Z][A-Za-z]{2} \d{4}'), # Apple
|
---|
| 20 | ('%Y-%m-%d', r'\d{4}-\d{2}-\d{2}' ), # MacSpeech
|
---|
| 21 | ('%Y-%m', r'\d{4}-\d{2}' ), # filename
|
---|
| 22 | ('%m1%d/%y', r'\d{1,2}1\d{1,2}/\d{1,2}' ), # T-Mo bad OCR
|
---|
| 23 | ('%m/%d1%y', r'\d{1,2}/\d{1,2}1\d{1,2}' ), # T-Mo bad OCR
|
---|
| 24 | ('%m/%d/%y', r'\d{1,2}/ \d{1,2}/ \d{1,2}' ), # T-Mo bad OCR
|
---|
| 25 | ('%m/%d/%Y',
|
---|
| 26 | r'(?:\d ?){1,2}/ (?:\d ?){1,2}/ (?:\d ?){4}' ), # Busey bad OCR
|
---|
| 27 | )
|
---|
| 28 |
|
---|
| 29 | RE_DATE = re.compile('|'.join(r'(\b%s\b)' % regex
|
---|
| 30 | for format, regex in DATE_FORMATS))
|
---|
| 31 |
|
---|
| 32 | def extract_date(contents, match=None):
|
---|
| 33 | no_match = []
|
---|
| 34 | for m in RE_DATE.finditer(contents):
|
---|
| 35 | matched_format = m.lastindex
|
---|
| 36 | format = DATE_FORMATS[matched_format - 1][0]
|
---|
| 37 | # note: spaces in strptime format match zero or more spaces, this is OK
|
---|
| 38 | matched = m.group(matched_format).replace(' ', '')
|
---|
| 39 | try:
|
---|
[566] | 40 | parsed = datetime.strptime(matched, format)
|
---|
[564] | 41 | except ValueError, e: # not a date
|
---|
| 42 | no_match.append((matched, format, e))
|
---|
| 43 | continue
|
---|
| 44 | if not match or (match.year, match.month) == (parsed.year, parsed.month):
|
---|
| 45 | return parsed.date(), no_match
|
---|
| 46 | no_match.append(m.group(matched_format))
|
---|
| 47 | return None, no_match
|
---|
| 48 |
|
---|
[566] | 49 | def extract_source(title, hint):
|
---|
| 50 | if hint:
|
---|
| 51 | return title[:RE_DATE.search(title).start(0)].rstrip()
|
---|
| 52 | else:
|
---|
| 53 | return title
|
---|
| 54 |
|
---|
[564] | 55 | EagleFiler = app(id='com.c-command.EagleFiler')
|
---|
| 56 | Paper = EagleFiler.documents['Paper.eflibrary']
|
---|
| 57 |
|
---|
[573] | 58 | if not Paper.exists():
|
---|
| 59 | EagleFiler.open(os.path.expanduser('~/Documents/Paper/Paper.eflibrary'))
|
---|
[564] | 60 |
|
---|
[573] | 61 | def read_sources():
|
---|
| 62 | return readPlist(PREFERENCES_PATH).get('Sources', [])
|
---|
| 63 |
|
---|
| 64 | if os.path.exists(PREFERENCES_PATH):
|
---|
| 65 | try:
|
---|
| 66 | sources = read_sources()
|
---|
| 67 | except:
|
---|
| 68 | from subprocess import call
|
---|
| 69 | call(['plutil', '-convert', 'xml1', PREFERENCES_PATH])
|
---|
| 70 | sources = read_sources()
|
---|
| 71 | else:
|
---|
| 72 | sources = []
|
---|
| 73 |
|
---|
[566] | 74 | def update_all():
|
---|
| 75 | for record in Paper.library_records[its.kind=='PDF']():
|
---|
| 76 | title = record.title()
|
---|
| 77 | hint, no_match = extract_date(title)
|
---|
| 78 | source = extract_source(title, hint)
|
---|
| 79 |
|
---|
| 80 | contents = record.contents()
|
---|
| 81 | if re.search(re.escape(source), contents, re.IGNORECASE):
|
---|
| 82 | if source in sources:
|
---|
| 83 | sources.remove(source)
|
---|
| 84 | sources.append(source)
|
---|
| 85 |
|
---|
| 86 | extracted, no_match = extract_date(contents, hint)
|
---|
| 87 |
|
---|
| 88 | if not extracted:
|
---|
| 89 | print title, hint
|
---|
| 90 | for nm in no_match:
|
---|
| 91 | print ' no match', nm
|
---|
| 92 | if not hint:
|
---|
| 93 | continue
|
---|
| 94 |
|
---|
| 95 | record.creation_date.set(extracted or hint)
|
---|
| 96 |
|
---|
| 97 | sources.reverse() # most recently referenced ones at top
|
---|
| 98 |
|
---|
| 99 | def scan_one():
|
---|
| 100 | Acrobat = app(id='com.adobe.Acrobat.Pro')
|
---|
| 101 | SystemEvents = app(id='com.apple.systemevents')
|
---|
| 102 | acro_process = SystemEvents.application_processes[u'Acrobat']
|
---|
| 103 |
|
---|
[593] | 104 | filename = datetime.now().strftime('Scanned Document %y%m%d %H%M%S')
|
---|
[566] | 105 |
|
---|
| 106 | SA = ScriptingAddition()
|
---|
| 107 | SA.activate()
|
---|
[648] | 108 | while True:
|
---|
| 109 | result = SA.display_dialog('How many pages do you wish to scan?',
|
---|
| 110 | buttons=['Cancel', 'Scan'],
|
---|
| 111 | cancel_button=1, default_button=2,
|
---|
| 112 | default_answer='1')
|
---|
| 113 | if result is None:
|
---|
| 114 | return False
|
---|
| 115 | try:
|
---|
| 116 | pages = int(result[k.text_returned])
|
---|
| 117 | except ValueError:
|
---|
| 118 | continue
|
---|
| 119 | if pages > 0:
|
---|
| 120 | break
|
---|
[566] | 121 |
|
---|
| 122 | Acrobat.activate()
|
---|
| 123 |
|
---|
| 124 | acro_process.menu_bars[1].menu_bar_items['Document'].menus[1].\
|
---|
| 125 | menu_items['Scan to PDF...'].click()
|
---|
| 126 | acro_process.windows['Acrobat Scan'].buttons['Scan'].click()
|
---|
| 127 |
|
---|
| 128 | # pause (Carbon -> Cocoa? use keystrokes instead?)
|
---|
| 129 | acro_process.windows['Save Scanned File As'].text_fields[1].value.\
|
---|
| 130 | set(filename)
|
---|
| 131 | acro_process.windows['Save Scanned File As'].buttons['Save'].click()
|
---|
| 132 |
|
---|
| 133 | acro_scan_window = acro_process.windows['Acrobat Scan']
|
---|
| 134 |
|
---|
| 135 | while True:
|
---|
| 136 | acro_process.windows['DSmobile 600'].buttons['Scan'].click()
|
---|
| 137 | while not acro_scan_window.exists():
|
---|
| 138 | time.sleep(0.1)
|
---|
| 139 |
|
---|
| 140 | pages -= 1
|
---|
| 141 |
|
---|
| 142 | if pages == 0:
|
---|
| 143 | acro_scan_window.groups[1].radio_buttons[2].click()
|
---|
| 144 | acro_scan_window.buttons['OK'].click()
|
---|
| 145 | break
|
---|
| 146 |
|
---|
| 147 | acro_scan_window.groups[1].radio_buttons[1].click()
|
---|
| 148 | acro_scan_window.buttons['OK'].click()
|
---|
| 149 |
|
---|
| 150 | scanned_document = Acrobat.documents['%s.pdf' % filename]
|
---|
[648] | 151 | scanned_file = scanned_document.file_alias(timeout=0)
|
---|
[566] | 152 | scanned_document.close()
|
---|
| 153 |
|
---|
[626] | 154 | record = Paper.import_(files=[scanned_file], deleting_afterwards=True)[0]
|
---|
[564] | 155 | contents = record.contents()
|
---|
[566] | 156 | m = re.search('(%s)' % '|'.join(map(re.escape, sources)), contents,
|
---|
| 157 | re.IGNORECASE)
|
---|
| 158 | if m:
|
---|
| 159 | # use the saved source's case
|
---|
| 160 | title = sources[map(str.lower, sources).index(m.group(1).lower())]
|
---|
| 161 | else:
|
---|
| 162 | title = '???'
|
---|
[564] | 163 |
|
---|
[566] | 164 | extracted, no_match = extract_date(contents)
|
---|
| 165 | if extracted:
|
---|
| 166 | title += extracted.strftime(' %Y-%m')
|
---|
| 167 | record.creation_date.set(extracted)
|
---|
[564] | 168 |
|
---|
[566] | 169 | record.title.set(title)
|
---|
| 170 |
|
---|
| 171 | return True
|
---|
| 172 |
|
---|
| 173 | # update_all()
|
---|
| 174 |
|
---|
| 175 | # XXX incremental source recording from EagleFiler (use tag to record)
|
---|
| 176 |
|
---|
| 177 | while scan_one():
|
---|
[648] | 178 | writePlist({'Sources': sources}, PREFERENCES_PATH)
|
---|