source: trunk/Update Podcasts/update_podcasts.py@ 535

Last change on this file since 535 was 492, checked in by Nicholas Riley, 16 years ago

Handle BRs and use short/long descriptions.

File size: 3.8 KB
Line 
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3
4from appscript import *
5from aem import AEEnum
6from itertools import izip
7import htmlentitydefs
8import re
9import sys
10import time
11
12# based on <http://sebsauvage.net/python/snyppets/>
13RE_ENTITY_CHR = re.compile(u'&(%s);' % u'|'.join(htmlentitydefs.name2codepoint))
14RE_ENTITY_DEC = re.compile(u'&#(\d+);')
15RE_ENTITY_HEX = re.compile(u'&#x(\w+);')
16def decode_entities(s):
17 def entity2char(m):
18 entity = m.group(1)
19 if entity in htmlentitydefs.name2codepoint:
20 return unichr(htmlentitydefs.name2codepoint[entity])
21 return u' ' # Unknown entity: We replace with a space.
22 replaced = 0
23 s, n = RE_ENTITY_CHR.subn(entity2char, s)
24 replaced += n
25 s, n = RE_ENTITY_DEC.subn(lambda x: unichr(int(x.group(1))), s)
26 replaced += n
27 s, n = RE_ENTITY_HEX.subn(lambda x: unichr(int(x.group(1),16)), s)
28 replaced += n
29 return s, replaced
30
31OPEN_P = r'<[Pp][^>]*>'
32RE_OPEN_P = re.compile(OPEN_P)
33RE_MATCHED_P = re.compile(OPEN_P + r'(?!<[Pp])(.*)</[Pp]>')
34RE_TOO_MANY_CR = re.compile(r'\s*\n\s*\n\s*\n+', re.U)
35RE_TAG = re.compile(r'<[^>]*>')
36RE_BR = re.compile(r'<[Bb][Rr][^>]*>\s*', re.U)
37RE_WHITESPACE = re.compile(r'\s+', re.U)
38RE_LEADING_WHITESPACE = re.compile(r'^\s+(.*)', re.U)
39RE_TRAILING_WHITESPACE = re.compile(r'(.*)\s+$', re.U)
40
41def html_to_text(s):
42 s, replaced = decode_entities(s)
43 s = s.replace('\r\n', '\n')
44 s = s.replace('\r', '\n')
45 if replaced > 0 or RE_TAG.search(s): # HTML
46 s = RE_WHITESPACE.sub(' ', s)
47 s = RE_MATCHED_P.sub(r'\1\n\n', s)
48 s = RE_OPEN_P.sub(r'\n\n', s)
49 s = RE_BR.sub(r'\n', s)
50 s = RE_TAG.sub('', s)
51 s = RE_TOO_MANY_CR.sub(r'\n\n', s)
52 s = RE_LEADING_WHITESPACE.sub(r'\1', s)
53 s = RE_TRAILING_WHITESPACE.sub(r'\1', s)
54 return s
55
56def words(s):
57 return set(RE_WHITESPACE.split(s))
58
59def podcasts_to_lyrics(iTunes):
60 podcasts = iTunes.tracks[its.podcast == True]
61
62 ids = podcasts.id()
63 descs = podcasts.description()
64 longdescs = podcasts.long_description()
65 lyricses = podcasts.lyrics()
66
67 for id_, desc, longdesc, lyrics in izip(ids, descs, longdescs, lyricses):
68 if lyrics == k.missing_value: # video
69 continue
70
71 if longdesc == k.missing_value:
72 if desc == k.missing_value:
73 continue
74 longdesc = desc
75 else:
76 longdesc = html_to_text(longdesc)
77 if desc != k.missing_value:
78 desc = html_to_text(desc)
79 if (desc not in longdesc and
80 len(words(desc) - words(longdesc)) > 1):
81 longdesc = '%s\n\n%s' % (desc, longdesc)
82
83 if lyrics == longdesc:
84 continue
85
86 iTunes.tracks[its.id == id_].lyrics.set(longdesc)
87
88def iTunes_main_pane():
89 return app(u'System Events').application_processes[u'iTunes'].windows[u'iTunes'].splitter_groups[1].scroll_areas[1]
90
91def wait_for_podcast_update(iTunes):
92 # show 'Podcasts'
93 iTunes.playlists[its.special_kind == AEEnum('kSpP')].reveal()
94 podcast_status = iTunes_main_pane().outlines[1].rows.static_texts[1].value
95 while any(status in (u'downloading', u'queued for download')
96 for status in podcast_status.get()):
97 time.sleep(0.5)
98
99def wait_for_iPod_update(iTunes):
100 # show iPod
101 iTunes.sources[its.kind == AEEnum('kPod')].sources[1].library_playlists[1].reveal()
102 sync_enabled = iTunes_main_pane().buttons[u'Sync'].enabled
103 while sync_enabled.get() == False:
104 time.sleep(0.5)
105
106if __name__ == '__main__':
107 iTunes = app('iTunes')
108 print >> sys.stderr, 'Synchronizing iPod...'
109 iTunes.update()
110 wait_for_iPod_update(iTunes)
111 print >> sys.stderr, 'Updating podcasts...'
112 iTunes.updateAllPodcasts()
113 wait_for_podcast_update(iTunes)
114 print >> sys.stderr, 'Copying podcast descriptions to lyrics...'
115 podcasts_to_lyrics(iTunes)
116 iTunes.update()
Note: See TracBrowser for help on using the repository browser.