Changeset 627
- Timestamp:
- 08/13/10 00:02:12 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Update Podcasts/update_podcasts.py
r625 r627 4 4 from appscript import * 5 5 from aem import AEEnum 6 from itertools import izip7 import htmlentitydefs8 import re9 6 import sys 10 7 import time 11 12 # based on <http://sebsauvage.net/python/snyppets/>13 RE_ENTITY_CHR = re.compile(u'&(%s);' % u'|'.join(htmlentitydefs.name2codepoint))14 RE_ENTITY_DEC = re.compile(u'&#(\d+);')15 RE_ENTITY_HEX = re.compile(u'&#x(\w+);')16 def decode_entities(s):17 def entity2char(m):18 entity = m.group(1)19 if entity in htmlentitydefs.name2codepoint:20 return unichr(htmlentitydefs.name2codepoint[entity])21 return u' ' # Unknown entity: We replace with a space.22 replaced = 023 s, n = RE_ENTITY_CHR.subn(entity2char, s)24 replaced += n25 s, n = RE_ENTITY_DEC.subn(lambda x: unichr(int(x.group(1))), s)26 replaced += n27 s, n = RE_ENTITY_HEX.subn(lambda x: unichr(int(x.group(1),16)), s)28 replaced += n29 return s, replaced30 31 OPEN_P = r'<[Pp][^>]*>'32 RE_OPEN_P = re.compile(OPEN_P)33 RE_MATCHED_P = re.compile(OPEN_P + r'(?!<[Pp])(.*)</[Pp]>')34 RE_TOO_MANY_CR = re.compile(r'\s*\n\s*\n\s*\n+', re.U)35 RE_TAG = re.compile(r'<[^>]*>')36 RE_BR = re.compile(r'<[Bb][Rr][^>]*>\s*', re.U)37 RE_WHITESPACE = re.compile(r'\s+', re.U)38 RE_LEADING_WHITESPACE = re.compile(r'^\s+(.*)', re.U)39 RE_TRAILING_WHITESPACE = re.compile(r'(.*)\s+$', re.U)40 41 def html_to_text(s):42 s, replaced = decode_entities(s)43 s = s.replace('\r\n', '\n')44 s = s.replace('\r', '\n')45 if replaced > 0 or RE_TAG.search(s): # HTML46 s = RE_WHITESPACE.sub(' ', s)47 s = RE_MATCHED_P.sub(r'\1\n\n', s)48 s = RE_OPEN_P.sub(r'\n\n', s)49 s = RE_BR.sub(r'\n', s)50 s = RE_TAG.sub('', s)51 s = RE_TOO_MANY_CR.sub(r'\n\n', s)52 s = RE_LEADING_WHITESPACE.sub(r'\1', s)53 s = RE_TRAILING_WHITESPACE.sub(r'\1', s)54 return s55 56 def words(s):57 return set(RE_WHITESPACE.split(s))58 59 def podcasts_to_lyrics(iTunes):60 podcasts = iTunes.tracks[its.podcast == True]61 62 ids = podcasts.id()63 descs = podcasts.description()64 longdescs = podcasts.long_description()65 lyricses = podcasts.lyrics()66 67 for id_, desc, longdesc, lyrics in izip(ids, descs, longdescs, lyricses):68 if lyrics == k.missing_value: # video69 continue70 71 if longdesc == k.missing_value:72 if desc == k.missing_value:73 continue74 longdesc = desc75 else:76 longdesc = html_to_text(longdesc)77 if desc != k.missing_value:78 desc = html_to_text(desc)79 if (desc not in longdesc and80 len(words(desc) - words(longdesc)) > 1):81 longdesc = '%s\n\n%s' % (desc, longdesc)82 83 if lyrics == longdesc:84 continue85 86 iTunes.tracks[its.id == id_].lyrics.set(longdesc)87 8 88 9 def iTunes_main_pane(): … … 119 40 iTunes.updateAllPodcasts() 120 41 wait_for_podcast_update(iTunes) 121 print >> sys.stderr, 'Copying podcast descriptions to lyrics...'122 podcasts_to_lyrics(iTunes)123 42 print >> sys.stderr, 'Synchronizing iPod...' 124 43 iTunes.update()
Note:
See TracChangeset
for help on using the changeset viewer.