Context Navigation

source: trunk/Update Dates/update_dates.py@ 593

Last change on this file since 593 was 593, checked in by Nicholas Riley, 14 years ago
update_dates.py: Use minutes rather than months in timestamp - the cause of bizarre filename conflicts.
File size: 6.1 KB

Line
1	from appscript import *
2	from datetime import datetime
3	from osax import *
4	from plistlib import readPlist, writePlist
5	import os
6	import re
7	import time
8
9	PREFERENCES_PATH = \
10	os.path.expanduser('~/Library/Preferences/net.sabi.UpdateDates.plist')
11
12	DATE_FORMATS = (('%m/%d/%y', r'\d{1,2}/\d{1,2}/\d{1,2}' ), # T-Mobile
13	('%m.%d.%y', r'\d{1,2}\.\d{1,2}\.\d{1,2}' ), # iFixit
14	('%b %d, %Y', r'[A-Z][a-z][a-z] \d{1,2}, \d{4}'), # AmerenIP
15	('%B %d, %Y', r'[A-Z][a-z]+ ?\d{1,2}, ?\d{4}' ), # Amazon
16	('of %Y%m%d', r'of \d{8}' ), # Amazon
17	('%m/%d/%Y', r'\d{1,2}/\d{1,2}/\d{4}' ), # Busey
18	('%b %d %Y', r'[A-Z]{3} \d{1,2} \d{4}' ), # State Farm
19	('%d %b %Y', r'\d{1,2} [A-Z][A-Za-z]{2} \d{4}'), # Apple
20	('%Y-%m-%d', r'\d{4}-\d{2}-\d{2}' ), # MacSpeech
21	('%Y-%m', r'\d{4}-\d{2}' ), # filename
22	('%m1%d/%y', r'\d{1,2}1\d{1,2}/\d{1,2}' ), # T-Mo bad OCR
23	('%m/%d1%y', r'\d{1,2}/\d{1,2}1\d{1,2}' ), # T-Mo bad OCR
24	('%m/%d/%y', r'\d{1,2}/ \d{1,2}/ \d{1,2}' ), # T-Mo bad OCR
25	('%m/%d/%Y',
26	r'(?:\d ?){1,2}/ (?:\d ?){1,2}/ (?:\d ?){4}' ), # Busey bad OCR
27	)
28
29	RE_DATE = re.compile('\|'.join(r'(\b%s\b)' % regex
30	for format, regex in DATE_FORMATS))
31
32	def extract_date(contents, match=None):
33	no_match = []
34	for m in RE_DATE.finditer(contents):
35	matched_format = m.lastindex
36	format = DATE_FORMATS[matched_format - 1][0]
37	# note: spaces in strptime format match zero or more spaces, this is OK
38	matched = m.group(matched_format).replace(' ', '')
39	try:
40	parsed = datetime.strptime(matched, format)
41	except ValueError, e: # not a date
42	no_match.append((matched, format, e))
43	continue
44	if not match or (match.year, match.month) == (parsed.year, parsed.month):
45	return parsed.date(), no_match
46	no_match.append(m.group(matched_format))
47	return None, no_match
48
49	def extract_source(title, hint):
50	if hint:
51	return title[:RE_DATE.search(title).start(0)].rstrip()
52	else:
53	return title
54
55	EagleFiler = app(id='com.c-command.EagleFiler')
56	Paper = EagleFiler.documents['Paper.eflibrary']
57
58	if not Paper.exists():
59	EagleFiler.open(os.path.expanduser('~/Documents/Paper/Paper.eflibrary'))
60
61	def read_sources():
62	return readPlist(PREFERENCES_PATH).get('Sources', [])
63
64	if os.path.exists(PREFERENCES_PATH):
65	try:
66	sources = read_sources()
67	except:
68	from subprocess import call
69	call(['plutil', '-convert', 'xml1', PREFERENCES_PATH])
70	sources = read_sources()
71	else:
72	sources = []
73
74	def update_all():
75	for record in Paper.library_records[its.kind=='PDF']():
76	title = record.title()
77	hint, no_match = extract_date(title)
78	source = extract_source(title, hint)
79
80	contents = record.contents()
81	if re.search(re.escape(source), contents, re.IGNORECASE):
82	if source in sources:
83	sources.remove(source)
84	sources.append(source)
85
86	extracted, no_match = extract_date(contents, hint)
87
88	if not extracted:
89	print title, hint
90	for nm in no_match:
91	print ' no match', nm
92	if not hint:
93	continue
94
95	record.creation_date.set(extracted or hint)
96
97	sources.reverse() # most recently referenced ones at top
98
99	def scan_one():
100	Acrobat = app(id='com.adobe.Acrobat.Pro')
101	SystemEvents = app(id='com.apple.systemevents')
102	acro_process = SystemEvents.application_processes[u'Acrobat']
103
104	filename = datetime.now().strftime('Scanned Document %y%m%d %H%M%S')
105
106	SA = ScriptingAddition()
107	SA.activate()
108	try:
109	while True:
110	result = SA.display_dialog('How many pages do you wish to scan?',
111	buttons=['Cancel', 'Scan'],
112	cancel_button=1, default_button=2,
113	default_answer='1')
114	try:
115	pages = int(result[k.text_returned])
116	except ValueError:
117	continue
118	if pages > 0:
119	break
120	except CommandError:
121	return False
122
123	Acrobat.activate()
124
125	acro_process.menu_bars[1].menu_bar_items['Document'].menus[1].\
126	menu_items['Scan to PDF...'].click()
127	acro_process.windows['Acrobat Scan'].buttons['Scan'].click()
128
129	# pause (Carbon -> Cocoa? use keystrokes instead?)
130	acro_process.windows['Save Scanned File As'].text_fields[1].value.\
131	set(filename)
132	acro_process.windows['Save Scanned File As'].buttons['Save'].click()
133
134	acro_scan_window = acro_process.windows['Acrobat Scan']
135
136	while True:
137	acro_process.windows['DSmobile 600'].buttons['Scan'].click()
138	while not acro_scan_window.exists():
139	time.sleep(0.1)
140
141	pages -= 1
142
143	if pages == 0:
144	acro_scan_window.groups[1].radio_buttons[2].click()
145	acro_scan_window.buttons['OK'].click()
146	break
147
148	acro_scan_window.groups[1].radio_buttons[1].click()
149	acro_scan_window.buttons['OK'].click()
150
151	scanned_document = Acrobat.documents['%s.pdf' % filename]
152	scanned_file = scanned_document.file_alias()
153	scanned_document.close()
154
155	record = Paper.import_(files=[scanned_file])[0]
156	contents = record.contents()
157	m = re.search('(%s)' % '\|'.join(map(re.escape, sources)), contents,
158	re.IGNORECASE)
159	if m:
160	# use the saved source's case
161	title = sources[map(str.lower, sources).index(m.group(1).lower())]
162	else:
163	title = '???'
164
165	extracted, no_match = extract_date(contents)
166	if extracted:
167	title += extracted.strftime(' %Y-%m')
168	record.creation_date.set(extracted)
169
170	record.title.set(title)
171
172	return True
173
174	# update_all()
175
176	# XXX incremental source recording from EagleFiler (use tag to record)
177
178	while scan_one():
179	pass
180
181	writePlist({'Sources': sources}, PREFERENCES_PATH)

Note: See TracBrowser for help on using the repository browser.

Download in other formats: