Changeset 492
- Timestamp:
- 02/17/09 10:28:37 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Update Podcasts/update_podcasts.py
r491 r492 34 34 RE_TOO_MANY_CR = re.compile(r'\s*\n\s*\n\s*\n+', re.U) 35 35 RE_TAG = re.compile(r'<[^>]*>') 36 RE_BR = re.compile(r'<[Bb][Rr][^>]*>\s*', re.U) 36 37 RE_WHITESPACE = re.compile(r'\s+', re.U) 37 38 RE_LEADING_WHITESPACE = re.compile(r'^\s+(.*)', re.U) … … 46 47 s = RE_MATCHED_P.sub(r'\1\n\n', s) 47 48 s = RE_OPEN_P.sub(r'\n\n', s) 49 s = RE_BR.sub(r'\n', s) 48 50 s = RE_TAG.sub('', s) 49 51 s = RE_TOO_MANY_CR.sub(r'\n\n', s) … … 51 53 s = RE_TRAILING_WHITESPACE.sub(r'\1', s) 52 54 return s 55 56 def words(s): 57 return set(RE_WHITESPACE.split(s)) 53 58 54 59 def podcasts_to_lyrics(iTunes): … … 70 75 else: 71 76 longdesc = html_to_text(longdesc) 77 if desc != k.missing_value: 78 desc = html_to_text(desc) 79 if (desc not in longdesc and 80 len(words(desc) - words(longdesc)) > 1): 81 longdesc = '%s\n\n%s' % (desc, longdesc) 72 82 73 83 if lyrics == longdesc:
Note:
See TracChangeset
for help on using the changeset viewer.