00001
00002
00003 """
00004 This Python script is intended to perform movie data lookups based on
00005 the popular www.imdb.com website using the IMDbPy script.
00006
00007 The script is hosted at http://imdbpy.sourceforge.net/, you need to install
00008 it to make this script work.
00009
00010 This wrapper script is written by
00011 Pekka Jääskeläinen (gmail: pekka.jaaskelainen).
00012 """
00013
00014 import sys
00015 import optparse
00016 import re
00017 import socket
00018
00019 try:
00020 import imdb
00021 except ImportError:
00022 print "You need to install the IMDbPy library "\
00023 "from (http://imdbpy.sourceforge.net/?page=download)"
00024 sys.exit(1)
00025
00026 try:
00027 from MythTV import MythTV
00028 mythtv = MythTV()
00029 except:
00030 mythtv = None
00031
00032 def detect_series_title(search_string):
00033 """
00034 Detects a series episode title.
00035
00036 Currently the following formats are detected:
00037 "Sopranos Season 1 Episode 2"
00038 "Sopranos S1E2"
00039 "Sopranos S1 E2"
00040 "Sopranos 1x2"
00041 "Sopranos - 1x2"
00042 "Sopranos 612"
00043 """
00044 regexps = [re.compile(r"((?P<title>.+?)(-)?)?(\s*)"\
00045 "(s|(season\s)|\s)(?P<season>\d+)"\
00046 "\s*(e|(episode\s)|x)(?P<episode>\d+)"),
00047 re.compile(\
00048 r"((?P<title>.+)(-)?)?\s+"\
00049 "(?P<season>\d)(?P<episode>\d\d)(?!\d)\s*")]
00050
00051 for exp in regexps:
00052 m = exp.match(search_string.lower())
00053 if m is None or m.group('title') is None or m.group('season') is None \
00054 or m.group('episode') is None:
00055 continue
00056
00057
00058 return (m.group('title'), m.group('season'), m.group('episode'))
00059
00060 return (None, None, None)
00061
00062
00063 def episode_search(title, season, episode):
00064 """
00065 Searches the IMDb for an exact TV-serie episode match.
00066
00067 Returns a list of 3-tuples [imdb id, title, year] for possible matches.
00068 Rest of the data need to be fetched separately with fetch_metadata().
00069 """
00070 matches = []
00071 imdb_access = imdb.IMDb()
00072 series = imdb_access.search_movie(title.encode("ascii", 'replace'))
00073 season = int(season)
00074 episode = int(episode)
00075
00076 for serie in series:
00077 if serie['kind'] == 'tv series':
00078
00079 imdb_access.update(serie, 'episodes')
00080 if serie.has_key('episodes'):
00081 try:
00082 ep = serie['episodes'][season][episode]
00083 except:
00084
00085
00086 continue
00087
00088 matches = []
00089 series_title = ep['series title']
00090 year = 0
00091 try:
00092 year = int(ep['year'])
00093 except:
00094 pass
00095
00096 matches.append([imdb_access.get_imdbID(ep),
00097 "%s Season %d Episode %d" % \
00098 (series_title, season, episode), year])
00099 return matches
00100 else:
00101 matches.append([imdb_access.get_imdbID(serie),
00102 serie['title'], int(serie['year'])])
00103 return matches
00104
00105 def title_search(search_string):
00106 """
00107 Returns a list of 3-tuples [imdb id, title, year] for possible matches.
00108 """
00109
00110 (title, season, episode) = detect_series_title(search_string)
00111
00112
00113
00114
00115 if title is not None and season is not None and episode is not None:
00116 return episode_search(title, season, episode)
00117
00118 imdb_access = imdb.IMDb()
00119
00120 movies = imdb_access.search_movie(search_string.encode("ascii", 'ignore'))
00121
00122 if movies is None or len(movies) == 0:
00123 return None
00124
00125 exact_titled = []
00126
00127
00128 def removeArticles(string):
00129 articles = ["The", "An", "A"]
00130 for article in articles:
00131 if string.startswith(article + " "):
00132 string = string[len(article + " "):]
00133 if string.endswith(", " + article):
00134 string = string[:-len(article + " ")]
00135 return string
00136
00137
00138 for imdb_movie in movies:
00139
00140 title = imdb_movie['title']
00141
00142
00143 if removeArticles(title).lower() == removeArticles(search_string).lower():
00144 exact_titled.append(imdb_movie)
00145
00146 sorted_movies = []
00147 if len(exact_titled) == 1:
00148 sorted_movies = [exact_titled[0]]
00149 elif len(exact_titled) == 0:
00150
00151 sorted_movies = movies[0:4]
00152 elif len(exact_titled) > 1:
00153
00154 def cmp_years(a, b):
00155 return int(a['year']) > int(b['year'])
00156 exact_titled.sort(cmp_years)
00157 sorted_movies = exact_titled[0:4]
00158
00159 movies = []
00160 for m in sorted_movies:
00161 try:
00162 movies.append([imdb_access.get_imdbID(m), m['title'], int(m['year'])])
00163 except KeyError:
00164 movies.append([imdb_access.get_imdbID(m), m['title'], 1901])
00165 movies.append([imdb_access.get_imdbID(m), m['title'], int(m['year'])])
00166 return movies
00167
00168 def find_poster_url(imdb_id):
00169
00170 imdb_access = imdb.IMDb()
00171 movie = imdb_access.get_movie(imdb_id)
00172 imdb_access.update(movie)
00173 url = None
00174 if 'cover url' in movie.keys():
00175 url = movie['cover url']
00176
00177 if url is None and movie['kind'] == 'episode':
00178 series = movie['episode of']
00179 imdb_access.update(series)
00180 if 'cover url' in series.keys():
00181 url = series['cover url']
00182 return url
00183
00184 def poster_search(imdb_id):
00185 url = find_poster_url(imdb_id)
00186 if url is not None:
00187 print url
00188
00189 class VideoMetadata:
00190
00191 series_episode = False
00192 series_title = ""
00193 season = None
00194 episode = None
00195
00196 title = ""
00197 runtime = None
00198 year = None
00199 directors = None
00200 plot = None
00201 cast = None
00202 rating = None
00203 mpaa_rating = None
00204 genres = None
00205 countries = None
00206 akas = None
00207
00208 def __init__(self):
00209 self.episode_title_format = None
00210 if mythtv != None:
00211 self.episode_title_format = mythtv.db.getSetting(
00212 'VideoEpisodeTitleFormat', socket.gethostname())
00213 if self.episode_title_format == None:
00214 self.episode_title_format = '%(series_title)s S%(season)02d E%(episode)02d %(episode_title)s'
00215
00216 def toMetadataString(self):
00217
00218 def createMetadataLine(keyName, value):
00219 if value is not None:
00220 return keyName + ":" + value + "\n"
00221 else:
00222 return ""
00223 metadata = unicode("", "utf8")
00224 if self.series_episode == True and self.season is not None and \
00225 self.episode is not None:
00226 metadata += 'Title:' + self.episode_title_format % \
00227 {
00228 'series_title': self.series_title,
00229 'season': int(self.season),
00230 'episode': int(self.episode),
00231 'episode_title': self.episode_title,
00232 } + '\n'
00233 else:
00234 metadata += createMetadataLine("Title", unicode(self.title))
00235 metadata += createMetadataLine("Runtime", self.runtime)
00236 metadata += createMetadataLine('Year', str(self.year))
00237 if self.directors is not None and len(self.directors) > 0:
00238 metadata += createMetadataLine("Director", unicode(self.directors[0]))
00239 metadata += createMetadataLine("Plot", self.plot)
00240 metadata += createMetadataLine("Cast", unicode(self.cast))
00241 metadata += createMetadataLine('UserRating', self.rating)
00242 metadata += createMetadataLine('MovieRating', self.mpaa_rating)
00243 metadata += createMetadataLine('Genres', self.genres)
00244 metadata += createMetadataLine('Countries', self.countries)
00245
00246 if self.akas is not None:
00247 metadata += createMetadataLine('AKA', ", ".join(self.akas))
00248
00249 return unicode(metadata)
00250
00251 def fetch_metadata(imdb_id):
00252 """
00253 Fetches metadata for the given IMDb id.
00254
00255 Returns a VideoMetadata object.
00256 """
00257 metadata = VideoMetadata()
00258
00259 imdb_access = imdb.IMDb()
00260 movie = imdb_access.get_movie(imdb_id)
00261 imdb_access.update(movie)
00262
00263 def metadataFromField(key, default=None, m=movie):
00264
00265 searchKey = key.lower()
00266 if searchKey not in m.keys():
00267 return default
00268 value = unicode(m[searchKey])
00269 try:
00270 value = value.encode("utf8")
00271 except AttributeError:
00272 pass
00273 if value is not None:
00274 return value
00275 else:
00276 return default
00277
00278 def metadataFromFirst(key, default=None, m=movie):
00279
00280 searchKey = key.lower()
00281 if searchKey not in m.keys():
00282 return default
00283
00284 value = m[searchKey]
00285 if value is not None and len(value) > 0:
00286 if len(value) > 1:
00287 return ','.join(value).encode("utf8")
00288 else:
00289 return value[0].encode("utf8")
00290 else:
00291 return default
00292
00293 if movie['kind'] == 'episode':
00294
00295 metadata.series_episode = True
00296 if 'series title' in movie.keys():
00297 metadata.series_title = movie['series title']
00298 if 'season' in movie.keys():
00299 metadata.season = movie['season']
00300 if 'episode' in movie.keys():
00301 metadata.episode = movie['episode']
00302 if 'title' in movie.keys():
00303 metadata.episode_title = unicode(movie['title'])
00304
00305 if 'episode of' in movie.keys():
00306 series = movie['episode of']
00307 imdb_access.update(series)
00308 metadata.runtime = metadataFromFirst('runtimes', metadata.runtime, series)
00309 else:
00310 metadata.title = metadataFromField('title').decode("utf8")
00311
00312 metadata.year = metadataFromField('year')
00313
00314 if 'director' in movie.keys():
00315 directors = movie['director']
00316 if directors is not None:
00317 metadata.directors = directors
00318
00319 plots = []
00320 if 'plot' in movie.keys():
00321 plots = movie['plot']
00322 if movie.has_key('plot outline') and len(movie['plot outline']):
00323 plots.append("Outline::" + movie['plot outline'])
00324
00325 if plots is not None:
00326
00327 shortest_found = None
00328
00329 for plot in plots:
00330 text = plot.split("::")[1]
00331 if shortest_found == None or len(text) < len(shortest_found):
00332 shortest_found = text
00333 metadata.plot = shortest_found
00334
00335 cast = movie.get('cast')
00336 cast_str = ""
00337 if cast:
00338 cl = []
00339 for name in cast:
00340 cl.append(name['name'])
00341
00342 cast_str = ", ".join(cl)
00343
00344 metadata.cast = cast_str
00345
00346 metadata.rating = metadataFromField('rating', metadata.rating)
00347 metadata.mpaa_rating = metadataFromField('mpaa', metadata.mpaa_rating)
00348 metadata.runtime = metadataFromFirst('runtimes', metadata.runtime)
00349 metadata.genres = metadataFromFirst('genres', metadata.genres)
00350 metadata.countries = metadataFromFirst('countries', metadata.countries)
00351 if movie.has_key('akas'):
00352 metadata.akas = movie['akas']
00353
00354 return metadata
00355
00356 def metadata_search(imdb_id):
00357 meta = fetch_metadata(imdb_id)
00358 if meta is not None:
00359 return meta.toMetadataString()
00360
00361 def parse_meta(meta, key):
00362 for line in meta.split("\n"):
00363 beginning = key + ":"
00364 if line.startswith(beginning):
00365 return line[len(beginning):].strip()
00366 return None
00367
00368 def main():
00369 p = optparse.OptionParser()
00370 p.add_option('--version', '-v', action="store_true", default=False,
00371 help="display 1-line describing name, version, author etc")
00372 p.add_option('--info', '-i', action="store_true", default=False,
00373 help="display 1-line of info describing what makes this script unique")
00374 p.add_option('--movie_search', '-M', metavar='QUERY_STRING',
00375 help="displays a list of 'movieid:Movie Title' lines that may be "\
00376 "possible matches for the query. The lines are ranked "\
00377 "by descending priority.")
00378 p.add_option('--poster_search', '-P', metavar='IMDB_ID',
00379 help="displays a list of URL's to movie posters. The lines are "\
00380 "ranked by descending value.")
00381 p.add_option('--metadata_search', '-D', metavar='IMDB_ID',
00382 help="displays a list of 'name:value' pairs describing metadata "\
00383 "for the given movie at the IMDb id.")
00384 options, arguments = p.parse_args()
00385
00386 if options.version:
00387 print "MythVideo IMDbPy wrapper v1.0 (c) Pekka Jääskeläinen 2006"
00388 sys.exit(0)
00389
00390 if options.info:
00391 print """
00392 Uses the IMDbPy package to fetch the data, thus externalizes the actual
00393 parsing of IMDb data to another project, hopefully reducing the maintenance
00394 burden in the future, in addition supports fetching data for TV-series
00395 episodes."""
00396 sys.exit(0)
00397
00398 if options.movie_search is not None:
00399 results = title_search(options.movie_search.decode("utf8"))
00400 for result in results:
00401 print "%s:%s (%d)" % (result[0], result[1], result[2])
00402 elif options.poster_search is not None:
00403 poster_search(options.poster_search)
00404 elif options.metadata_search is not None:
00405 print metadata_search(options.metadata_search).encode("utf8")
00406 else:
00407 p.print_help()
00408 sys.exit(0)
00409
00410 if __name__ == '__main__':
00411 main()