source: trunk/Update Podcasts/update_podcasts.py @ 492

Last change on this file since 492 was 492, checked in by Nicholas Riley, 11 years ago

Handle BRs and use short/long descriptions.

File size: 3.8 KB
Line 
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3
4from appscript import *
5from aem import AEEnum
6from itertools import izip
7import htmlentitydefs
8import re
9import sys
10import time
11
12# based on <http://sebsauvage.net/python/snyppets/>
13RE_ENTITY_CHR = re.compile(u'&(%s);' % u'|'.join(htmlentitydefs.name2codepoint))
14RE_ENTITY_DEC = re.compile(u'&#(\d+);')
15RE_ENTITY_HEX = re.compile(u'&#x(\w+);')
16def decode_entities(s):
17    def entity2char(m):
18        entity = m.group(1)
19        if entity in htmlentitydefs.name2codepoint:
20            return unichr(htmlentitydefs.name2codepoint[entity])
21        return u' ' # Unknown entity: We replace with a space.
22    replaced = 0
23    s, n = RE_ENTITY_CHR.subn(entity2char, s)
24    replaced += n
25    s, n = RE_ENTITY_DEC.subn(lambda x: unichr(int(x.group(1))), s)
26    replaced += n
27    s, n = RE_ENTITY_HEX.subn(lambda x: unichr(int(x.group(1),16)), s)
28    replaced += n
29    return s, replaced
30
31OPEN_P = r'<[Pp][^>]*>'
32RE_OPEN_P = re.compile(OPEN_P)
33RE_MATCHED_P = re.compile(OPEN_P + r'(?!<[Pp])(.*)</[Pp]>')
34RE_TOO_MANY_CR = re.compile(r'\s*\n\s*\n\s*\n+', re.U)
35RE_TAG = re.compile(r'<[^>]*>')
36RE_BR = re.compile(r'<[Bb][Rr][^>]*>\s*', re.U)
37RE_WHITESPACE = re.compile(r'\s+', re.U)
38RE_LEADING_WHITESPACE = re.compile(r'^\s+(.*)', re.U)
39RE_TRAILING_WHITESPACE = re.compile(r'(.*)\s+$', re.U)
40
41def html_to_text(s):
42    s, replaced = decode_entities(s)
43    s = s.replace('\r\n', '\n')
44    s = s.replace('\r', '\n')
45    if replaced > 0 or RE_TAG.search(s): # HTML
46        s = RE_WHITESPACE.sub(' ', s)
47        s = RE_MATCHED_P.sub(r'\1\n\n', s)
48        s = RE_OPEN_P.sub(r'\n\n', s)
49        s = RE_BR.sub(r'\n', s)
50        s = RE_TAG.sub('', s)
51        s = RE_TOO_MANY_CR.sub(r'\n\n', s)
52    s = RE_LEADING_WHITESPACE.sub(r'\1', s)
53    s = RE_TRAILING_WHITESPACE.sub(r'\1', s)
54    return s
55
56def words(s):
57    return set(RE_WHITESPACE.split(s))
58
59def podcasts_to_lyrics(iTunes):
60    podcasts = iTunes.tracks[its.podcast == True]
61
62    ids = podcasts.id()
63    descs = podcasts.description()
64    longdescs = podcasts.long_description()
65    lyricses = podcasts.lyrics()
66
67    for id_, desc, longdesc, lyrics in izip(ids, descs, longdescs, lyricses):
68        if lyrics == k.missing_value: # video
69            continue
70
71        if longdesc == k.missing_value:
72            if desc == k.missing_value:
73                continue
74            longdesc = desc
75        else:
76            longdesc = html_to_text(longdesc)
77            if desc != k.missing_value:
78                desc = html_to_text(desc)
79                if (desc not in longdesc and
80                    len(words(desc) - words(longdesc)) > 1):
81                    longdesc = '%s\n\n%s' % (desc, longdesc)
82
83        if lyrics == longdesc:
84            continue
85
86        iTunes.tracks[its.id == id_].lyrics.set(longdesc)
87
88def iTunes_main_pane():
89    return app(u'System Events').application_processes[u'iTunes'].windows[u'iTunes'].splitter_groups[1].scroll_areas[1]
90
91def wait_for_podcast_update(iTunes):
92    # show 'Podcasts'
93    iTunes.playlists[its.special_kind == AEEnum('kSpP')].reveal()
94    podcast_status = iTunes_main_pane().outlines[1].rows.static_texts[1].value
95    while any(status in (u'downloading', u'queued for download')
96              for status in podcast_status.get()):
97        time.sleep(0.5)
98
99def wait_for_iPod_update(iTunes):
100    # show iPod
101    iTunes.sources[its.kind == AEEnum('kPod')].sources[1].library_playlists[1].reveal()
102    sync_enabled = iTunes_main_pane().buttons[u'Sync'].enabled
103    while sync_enabled.get() == False:
104        time.sleep(0.5)
105
106if __name__ == '__main__':
107    iTunes = app('iTunes')
108    print >> sys.stderr, 'Synchronizing iPod...'
109    iTunes.update()
110    wait_for_iPod_update(iTunes)
111    print >> sys.stderr, 'Updating podcasts...'
112    iTunes.updateAllPodcasts()
113    wait_for_podcast_update(iTunes)
114    print >> sys.stderr, 'Copying podcast descriptions to lyrics...'
115    podcasts_to_lyrics(iTunes)
116    iTunes.update()
Note: See TracBrowser for help on using the repository browser.