#!/usr/bin/python # -*- coding: utf-8 -*- from appscript import * from aem import AEEnum from itertools import izip import htmlentitydefs import re import sys import time # based on RE_ENTITY_CHR = re.compile(u'&(%s);' % u'|'.join(htmlentitydefs.name2codepoint)) RE_ENTITY_DEC = re.compile(u'&#(\d+);') RE_ENTITY_HEX = re.compile(u'&#x(\w+);') def decode_entities(s): def entity2char(m): entity = m.group(1) if entity in htmlentitydefs.name2codepoint: return unichr(htmlentitydefs.name2codepoint[entity]) return u' ' # Unknown entity: We replace with a space. replaced = 0 s, n = RE_ENTITY_CHR.subn(entity2char, s) replaced += n s, n = RE_ENTITY_DEC.subn(lambda x: unichr(int(x.group(1))), s) replaced += n s, n = RE_ENTITY_HEX.subn(lambda x: unichr(int(x.group(1),16)), s) replaced += n return s, replaced OPEN_P = r'<[Pp][^>]*>' RE_OPEN_P = re.compile(OPEN_P) RE_MATCHED_P = re.compile(OPEN_P + r'(?!<[Pp])(.*)') RE_TOO_MANY_CR = re.compile(r'\s*\n\s*\n\s*\n+', re.U) RE_TAG = re.compile(r'<[^>]*>') RE_BR = re.compile(r'<[Bb][Rr][^>]*>\s*', re.U) RE_WHITESPACE = re.compile(r'\s+', re.U) RE_LEADING_WHITESPACE = re.compile(r'^\s+(.*)', re.U) RE_TRAILING_WHITESPACE = re.compile(r'(.*)\s+$', re.U) def html_to_text(s): s, replaced = decode_entities(s) s = s.replace('\r\n', '\n') s = s.replace('\r', '\n') if replaced > 0 or RE_TAG.search(s): # HTML s = RE_WHITESPACE.sub(' ', s) s = RE_MATCHED_P.sub(r'\1\n\n', s) s = RE_OPEN_P.sub(r'\n\n', s) s = RE_BR.sub(r'\n', s) s = RE_TAG.sub('', s) s = RE_TOO_MANY_CR.sub(r'\n\n', s) s = RE_LEADING_WHITESPACE.sub(r'\1', s) s = RE_TRAILING_WHITESPACE.sub(r'\1', s) return s def words(s): return set(RE_WHITESPACE.split(s)) def podcasts_to_lyrics(iTunes): podcasts = iTunes.tracks[its.podcast == True] ids = podcasts.id() descs = podcasts.description() longdescs = podcasts.long_description() lyricses = podcasts.lyrics() for id_, desc, longdesc, lyrics in izip(ids, descs, longdescs, lyricses): if lyrics == k.missing_value: # video continue if longdesc == k.missing_value: if desc == k.missing_value: continue longdesc = desc else: longdesc = html_to_text(longdesc) if desc != k.missing_value: desc = html_to_text(desc) if (desc not in longdesc and len(words(desc) - words(longdesc)) > 1): longdesc = '%s\n\n%s' % (desc, longdesc) if lyrics == longdesc: continue iTunes.tracks[its.id == id_].lyrics.set(longdesc) def iTunes_main_pane(): return app(u'System Events').application_processes[u'iTunes'].windows[u'iTunes'].splitter_groups[1].scroll_areas[1] def wait_for_podcast_update(iTunes): # show 'Podcasts' iTunes.playlists[its.special_kind == AEEnum('kSpP')].reveal() podcast_status = iTunes_main_pane().outlines[1].rows.static_texts[1].value while any(status in (u'downloading', u'queued for download') for status in podcast_status.get()): time.sleep(0.5) def wait_for_iPod_update(iTunes): # show iPod iTunes.sources[its.kind == AEEnum('kPod')].sources[1].library_playlists[1].reveal() sync_enabled = iTunes_main_pane().buttons[u'Sync'].enabled while True: try: if sync_enabled.get() == True: return except CommandError: pass time.sleep(0.5) if __name__ == '__main__': iTunes = app('iTunes') print >> sys.stderr, 'Synchronizing iPod...' iTunes.update() wait_for_iPod_update(iTunes) print >> sys.stderr, 'Updating podcasts...' iTunes.updateAllPodcasts() wait_for_podcast_update(iTunes) print >> sys.stderr, 'Copying podcast descriptions to lyrics...' podcasts_to_lyrics(iTunes) print >> sys.stderr, 'Synchronizing iPod...' iTunes.update() wait_for_iPod_update(iTunes)