MediaGrabber

From AwkwardTV
Revision as of 19:23, 23 May 2007 by Konfoo (talk | contribs)
Jump to: navigation, search

MediaGrabber grabs cover images and metadata for your media files automagically. Just point it at the directory root of your media items and it will scan all subdirectories and download covers and movie/tv/hddvd/etc. info automatically.

You can use it with ATVFiles, and even other platforms such as Vista Media Center's My Videos.

This script can be run on any platform with a php interpreter (windows, linux, osx, or on the ATV directly).

e.g. php mediagrabber.php c:\temp\mymovies DVD

If you want to contribute via paypal as a way to say thank you, that would be appreciated. :)

Code

  • Paste this into a file called mediagrabber.php.
  • Download a version of PHP
  • Place PHP and this script on your media storage server or the AppleTV
  • Run the script with no parameters i.e. php mediagrabber.php
  • Add command line parameters as required
  • NOTE: Mediawiki breaks the code - download the php code @ http://www.datacast.biz/mediagrabber/ (rightclick and save as)
  • An executable for windows users is also available at http://www.datacast.biz/mediagrabber/
<?php
###############################################################################
# MediaGrabber
# v1.5
###############################################################################
# License: Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported
#
# The license is available @ http://creativecommons.org/licenses/by-nc-sa/3.0/
# You may use this software provided you credit the author and obey the license.
# The author takes no responsibility for misuse of this software, and collects 
# no fee for its use.
#
# Contact: konfoo at gmail dott com
$ver="v1.5";
# NOTES
# - Make sure your version of PHP allows for fopen() to remote urls
#   If not, all your cover downloads will fail
#
# Changelog
# v1.5 (kon) - now retrieves XML metadata
#              renamed to mediagrabber
#              changed license to better reflect intended non-commercial use
# v1.4 (kon) - fixed file extension bs
# v1.3 (kon) - fixed case sensitive bug
# v1.2 (kon) - fixed stripping bug
# v1.1 (kon) - added timestamping, usage
# v1.0 (kon) - initial release
###############################################################################
# directoryToArray
# recursively convert a dir to an array
###############################################################################
function directoryToArray($directory, $recursive) 
{
    $array_items = array();
    if ($handle = opendir($directory)) 
    {
        while (false !== ($file = readdir($handle))) 
        {
            if ($file != "." && $file != "..") 
            {
                if (is_dir($directory. "/" . $file))
                {
                    if($recursive) 
                    {
                        $array_items = array_merge($array_items, 
                            directoryToArray($directory. "/" . $file, 
                            $recursive));
                    }
                    $file = $directory . "/" . $file;
                    $array_items[] = $file;
                } 
                else 
                {
                    $file = $directory . "/" . $file;
                    $array_items[] = $file;
                }
            }
        }
        closedir($handle);
    }
    return $array_items;
}
###############################################################################
# file_extension
# return ext for a file
###############################################################################
function file_extension($filename)
{
    $path_info = pathinfo($filename);
    return $path_info['extension'];
}
###############################################################################
# stripDirectory
# strip junk items that are not media from a directory array
###############################################################################
function stripDirectory($directory, $directoryArray) 
{
    $array_items = array();
    foreach($directoryArray as $val)
    {
        $ext = strtolower(file_extension($val));
        if($ext) 
        {
            if (($ext==='mpg')||($ext==='avi')||($ext==='vob')
                ||($ext==='mp4')||($ext==='mkv')||($ext==='mpeg')
                ||($ext==='mpeg4')||($ext==='m4v'))
            {
                $array_items[] = $val;
            }
        }
    }
    return $array_items;
}
###############################################################################
# stripExts
# strip common extensions i.e. cd 1 of 2, etc.
###############################################################################
function stripExts($val)
{
    $val=str_replace('cd1', "", $val);
    $val=str_replace('cd2', "", $val);
    $val=str_replace('cd3', "", $val);
    $val=str_replace('CD4', "", $val);
    $val=str_replace('CD1', "", $val);
    $val=str_replace('CD2', "", $val);
    $val=str_replace('CD3', "", $val);
    $val=str_replace('CD4', "", $val);
    $val=str_replace('part 1', "", $val);
    $val=str_replace('part 2', "", $val);
    $val=str_replace('part 3', "", $val);
    $val=str_replace('part 4', "", $val);
    $val=str_replace('part 5', "", $val);
    $val=str_replace('part 6', "", $val);
    $val=str_replace('Part 1', "", $val);
    $val=str_replace('Part 2', "", $val);
    $val=str_replace('Part 3', "", $val);
    $val=str_replace('Part 4', "", $val);
    $val=str_replace('Part 5', "", $val);
    $val=str_replace('Part 6', "", $val);
    $val=str_replace('1of1', "", $val);
    $val=str_replace('1of2', "", $val);
    $val=str_replace('2of2', "", $val);
    $val=str_replace('1of3', "", $val);
    $val=str_replace('2of3', "", $val);
    $val=str_replace('3of3', "", $val);
    $val=str_replace('1of4', "", $val);
    $val=str_replace('2of4', "", $val);
    $val=str_replace('3of4', "", $val);
    $val=str_replace('4of4', "", $val);
    $val=str_replace('1 of 1', "", $val);
    $val=str_replace('1 of 2', "", $val);
    $val=str_replace('2 of 2', "", $val);
    $val=str_replace('1 of 3', "", $val);
    $val=str_replace('2 of 3', "", $val);
    $val=str_replace('3 of 3', "", $val);
    $val=str_replace('1 of 4', "", $val);
    $val=str_replace('2 of 4', "", $val);
    $val=str_replace('3 of 4', "", $val);
    $val=str_replace('4 of 4', "", $val);
    $val=str_replace('-', ' ', $val);
    $val=str_replace('_', ' ', $val);
    $val=rtrim($val,"\x20");
    return $val;
}
###############################################################################
# stripFileeext
# strip off the file extension
###############################################################################
function stripFileeext($fileName)
{
    return substr($fileName, 0, strrpos($fileName, '.'));
}
###############################################################################
# getMovies
# extract title names from a directory array
###############################################################################
function getMovies($fileList)
{
    $array_items = array();
    foreach($fileList as $val)
    {
        $array_items[] = basename($val,".".file_extension($val));
    }
    return $array_items;
}
###############################################################################
# fetchUrlasstring
# grab a url, return a string
###############################################################################
function fetchUrlasstring($url)
{
    $contents=file_get_contents($url);
    return $contents;
}
###############################################################################
# download
# download a file from a given URL
###############################################################################
function download($file_source, $file_target)
{
    $file_source = str_replace(' ', '%20', html_entity_decode($file_source));
    if (file_exists($file_target)) 
    {
        chmod($file_target, 0777); 
    }
    if (($rh = fopen($file_source, 'rb')) === FALSE) 
    { 
        return false; 
    }
    if (($wh = fopen($file_target, 'wb')) === FALSE) 
    { 
        return false; 
    }
    while (!feof($rh))
    {
        if (fwrite($wh, fread($rh, 1024)) === FALSE) 
        { 
            fclose($rh); 
            fclose($wh); 
            return false; 
        }
  }
  fclose($rh);
  fclose($wh);
  return true;
}
###############################################################################
# writeFile
# write a file from a given string
###############################################################################
function writeFile($contents,$file_target)
{
    if (file_exists($file_target)) 
    {
        chmod($file_target, 0777); 
    }
    if (($wh = fopen($file_target, 'wb')) === FALSE) 
    { 
        return false; 
    }
    if (fwrite($wh, $contents) === FALSE) 
    { 
        fclose($wh); 
        return false; 
    }
    fclose($wh);
    return true;
}
###############################################################################
# getCoverimage
# grab a cover image from a remote site for a given title, save to disk
###############################################################################
function getCoverimage($fileName,$movieName,$searchURL,$cPreURL,$cPosURL,
                        $totalItems,$currentItem)
{
    print("[".date('h:i:s').'] "'.stripExts($movieName).'" ('.$currentItem
            .' of '.$totalItems.'): ');
    $searchResult=fetchUrlasstring($searchURL
                                   .urlencode(stripExts($movieName)));
    if ($searchResult)
    {
        $itemStartpos=strpos($searchResult,'item_id=');
        if ($itemStartpos)
        {
            $itemEndpos=strpos($searchResult,'&',$itemStartpos-1);
            $chopStr=substr($searchResult,$itemStartpos+8,
                            $itemEndpos-$itemStartpos-8);
            if ($chopStr)
            {
                $imageURL=$cPreURL.$chopStr.$cPosURL;
                print('Grabbing Image... ');
                if (download($imageURL,stripFileeext($fileName).'.jpg'))
                {
                    print("Done\n");
                }
                else
                {
                    print("Error\n");
                }
            }
        }
    }
}
###############################################################################
# html2Txt
# strip junk from URL text
###############################################################################
function html2Txt($document)
{
   $text=strip_tags($document);
   $text=str_replace("\r\n", " ", $text);
   $text=str_replace("   ", " ", $text);
   $text=str_replace("  ", " ", $text);
   $text=trim($text);
   return $text;
}
###############################################################################
# createXMLMetadata
# grab a cover image from a remote site for a given title, save to disk
###############################################################################
function createXMLMetadata($urlData,$fileName,$movieName,$totalItems,
                           $currentItem,$mediaType)
{
    print("[".date('h:i:s').'] "'.stripExts($movieName).'" ('.$currentItem
            .' of '.$totalItems.'): ');
    print('Writing XML Metadata... ');
    $metaData='<media type="Movie">'."\n";
    $metaData.='<title>'.$movieName.'</title>'."\n";
    $itemStartpos=strpos($urlData,'>Synopsis<');
    if ($itemStartpos)
    {
        $itemStartpos=strpos($urlData,'small3">',$itemStartpos+10);
        $itemEndpos=strpos($urlData,'</td>',$itemStartpos-1);
        $chopStr=substr($urlData,$itemStartpos+8,
                        $itemEndpos-$itemStartpos-8);
        if ($chopStr)
        {
            $metaData.='<description>';
            $metaData.=html2Txt($chopStr);
            $metaData.='</description>'."\n";
        }
    }
    $itemStartpos=strpos($urlData,'Rating:</b');
    if ($itemStartpos)
    {
        $itemStartpos=strpos($urlData,'> ',$itemStartpos+13);
        $itemEndpos=strpos($urlData,'<',$itemStartpos-1);
        $chopStr=substr($urlData,$itemStartpos+2,
                        $itemEndpos-$itemStartpos-2);
        if ($chopStr)
        {
            $metaData.='<rating>';
            $metaData.=html2Txt($chopStr);
            $metaData.='</rating>'."\n";
        }
    }
    $itemStartpos=strpos($urlData,'<b>Genre</b');
    if ($itemStartpos)
    {
        $itemStartpos=strpos($urlData,'>: ',$itemStartpos+11);
        $itemEndpos=strpos($urlData,'</td>',$itemStartpos-1);
        $chopStr=substr($urlData,$itemStartpos+3,
                        $itemEndpos-$itemStartpos-3);
        if ($chopStr)
        {
            $metaData.='<genres>'."\n";
            $elements = explode(', ', html2Txt($chopStr));
            for ($i = 0; $i < count($elements); $i++)
            {
               if ($i==0)
               {
                   if ($elements[$i]!=="")
                   {
                       $metaData.='<genre primary="true">'.$elements[$i]."</genre>\n";
                   }
               }
               else
               {
                   $metaData.="<genre>".$elements[$i]."</genre>\n";
               }
            }
            $metaData.='</genres>'."\n";
        }
    }
    $itemStartpos=strpos($urlData,'<b>Actors:</b');
    if ($itemStartpos)
    {
        $itemStartpos=strpos($urlData,'>',$itemStartpos+13);
        $itemEndpos=strpos($urlData,'</td>',$itemStartpos-1);
        $chopStr=substr($urlData,$itemStartpos+1,
                        $itemEndpos-$itemStartpos-1);
        if ($chopStr)
        {
            $metaData.='<cast>'."\n";
            $elements = explode(' &#149; ', html2Txt($chopStr));
            for ($i = 0; $i < count($elements); $i++)
            {
               if ($elements[$i]!=="")
               {
                   $metaData.="<name>".$elements[$i]."</name>\n";
               }
            }
            $metaData.='</cast>'."\n";
        }
    }
    $itemStartpos=strpos($urlData,'<b>Directors:</b');
    if ($itemStartpos)
    {
        $itemStartpos=strpos($urlData,'>',$itemStartpos+13);
        $itemEndpos=strpos($urlData,'</td>',$itemStartpos-1);
        $chopStr=substr($urlData,$itemStartpos+1,
                        $itemEndpos-$itemStartpos-1);
        if ($chopStr)
        {
            $metaData.='<directors>'."\n";
            $elements = explode(' &#149; ', html2Txt($chopStr));
            for ($i = 0; $i < count($elements); $i++)
            {
               if ($elements[$i]!=="")
               {
                   $metaData.="<name>".$elements[$i]."</name>\n";
               }
            }
            $metaData.='</directors>'."\n";
        }
    }
    $itemStartpos=strpos($urlData,'<b>Producers:</b');
    if ($itemStartpos)
    {
        $itemStartpos=strpos($urlData,'>',$itemStartpos+16);
        $itemEndpos=strpos($urlData,'<b>',$itemStartpos-1);
        $chopStr=substr($urlData,$itemStartpos+1,
                        $itemEndpos-$itemStartpos-1);
        if ($chopStr)
        {
            $metaData.='<producers>'."\n";
            $elements = explode(' &#149; ', html2Txt($chopStr));
            for ($i = 0; $i < count($elements); $i++)
            {
               if ($elements[$i]!=="")
               {
                   $metaData.="<name>".$elements[$i]."</name>\n";
               }
            }
            $metaData.='</producers>'."\n";
        }
    }
    $itemStartpos=strpos($urlData,'<b>Release Date:</b');
    if ($itemStartpos)
    {
        $itemStartpos=strpos($urlData,'> ',$itemStartpos+19);
        $itemEndpos=strpos($urlData,'<',$itemStartpos-1);
        $chopStr=substr($urlData,$itemStartpos+2,
                        $itemEndpos-$itemStartpos-2);
        if ($chopStr)
        {
            $metaData.='<released>';
            $metaData.=html2Txt($chopStr);
            $metaData.='</released>'."\n";
        }
    }
    $itemStartpos=strpos($urlData,'<b>Length:</b');
    if ($itemStartpos)
    {
        $itemStartpos=strpos($urlData,'> ',$itemStartpos+13);
        $itemEndpos=strpos($urlData,'<',$itemStartpos-1);
        $chopStr=substr($urlData,$itemStartpos+2,
                        $itemEndpos-$itemStartpos-2);
        if ($chopStr)
        {
            $metaData.='<duration>';
            $runTime=html2Txt($chopStr);
            $runTime=str_replace(" mins","",$runTime);
            $runTime=$runTime*60;
            $metaData.=$runTime;
            $metaData.='</duration>'."\n";
        }
    }
    $metaData.='</media>';
    if (writeFile($metaData,$fileName))
    {
        print("Done\n");
    }
    else
    {
        print("Error writing metadata!\n");
    }
}
###############################################################################
# getXmlmetadata
# grab metadata from remote site for given title, save to disk
###############################################################################
function getXmlmetadata($fileName,$movieName,$searchURL,$metadataPre,
                        $totalItems,$currentItem,$mediaType)
{
    print("[".date('h:i:s').'] "'.stripExts($movieName).'" ('.$currentItem
            .' of '.$totalItems.'): ');
    $searchResult=fetchUrlasstring($searchURL
                                   .urlencode(stripExts($movieName)));
    if ($searchResult)
    {
        $itemStartpos=strpos($searchResult,'item_id=');
        if ($itemStartpos)
        {
            $itemEndpos=strpos($searchResult,'&',$itemStartpos-1);
            $chopStr=substr($searchResult,$itemStartpos+8,
                            $itemEndpos-$itemStartpos-8);
            if ($chopStr)
            {
                $xmlURL=$metadataPre.$chopStr;
                print('Grabbing URL... ');
                $searchResult="";
                $searchResult=fetchUrlasstring($xmlURL);
                if ($searchResult!=="")
                {
                    print("Done\n");
                    createXMLMetadata($searchResult,
                                       stripFileeext($fileName).'.xml',
                                       $movieName,$totalItems,$currentItem,
                                       $mediaType);
                }
                else
                {
                    print("Error\n");
                }
            }
        }
    }
}
###############################################################################
# main
# main loop
###############################################################################
if ((!$argv[1])||(!$argv[2]))
{
    print("\nMediaGrabber ".$ver."\nAuthor: konfoo at gmail dott com\n\n");
    print("An automated cover downloader for HDDVD, DVD and other");
    print("video content.\n");
    print("Retrieves cover images and associated XML metadata for video");
    print(" content.\n\n");
    print("License:\n");
    print("        Creative Commons Attribution-NonCommercial-ShareAlike");
    print("3.0 Unported\n\n");
    print("Terms:\n");
    print("        A copy of the License is available at:\n");
    print("        http://creativecommons.org/licenses/by-nc-sa/3.0/\n");
    print("        You may use this software provided you credit the\n");
    print("        author and obey the license. The author takes no\n");
    print("        responsibility for misuse of this software, and\n");
    print("        collects no fee for it's use.\n\n");
    print("Usage:  MediaGrabber <target directory> <searchtype>\n");
    print("        MediaGrabber c:\\temp\\movies DVD\n");
    print("        MediaGrabber c:\\temp\\tvshows All\n\n");
    print("Supported searchtypes:\n");
    print("        All, DVD, HDDVD, Bluray, UMD, Music, VOD, User\n\n");
    print("Works with:\n");
    print("        AppleTV (ATVFiles)\n"); 
    print("        Media Center (My Videos)\n\n");
    print("Input:  folder/sub<...>/videotitle.<avi/mpg/mp4/vob/mpeg/...>\n");
    print("Output: folder/sub<...>/videotitle.jpg\n");
    print("Output: folder/sub<...>/videotitle.xml\n");
    die;
}
error_reporting(0);
ini_set('default_socket_timeout', 120);    
print("[".date('h:i:s')."] MediaGrabber ".$ver."\n");
print("[".date('h:i:s')."] Author: konfoo at gmail dott com\n");
print("[".date('h:i:s')."] License: Creative Commons Attribution-"
      ."NonCommercial-ShareAlike.\n");
print("[".date('h:i:s')."] Run with no parameters for additional terms.\n");
print("[".date('h:i:s')."] Initializing\n");
$userID=rand(99000000000000,99555555555555);
$searchString='http://www.dv'.'dempire.com/Exec/v4'.'_search_pre.asp?userid='
                .$userID
                .'&view=&pp=&site_id=4&search_type='.$argv[2]
                .'&media_type=0&search_string=';
$coverPre='http://images2.dv'.'dempire.com/gen/movies/';
$coverPos='h.jpg';
$metadataPre='http://www.dv'.'dempire.com/Exec/v4'.'_item.asp?userid='
                .userID
                .'&item_id=';
print("[".date('h:i:s')."] Building list of files\n");
$rescursiveDirlist=directoryToArray($argv[1],true);
print("[".date('h:i:s')."] Removing invalid entries and filetypes\n");
$fileList=stripDirectory($argv[1],$rescursiveDirlist);
print("[".date('h:i:s')."] Building list of titles\n");
$movieList=getMovies($fileList);
$counter=0;
print("[".date('h:i:s')."] Starting Cover Download...\n");
foreach($fileList as $val)
{
    if (file_exists(stripFileeext($fileList[$counter]).".jpg"))
    {
        print("[".date('h:i:s').'] Skipping Existing Cover "'
              .stripExts($movieList[$counter]).'"'."\n");
    }
    else
    {
        getCoverimage($fileList[$counter],$movieList[$counter],$searchString,
                        $coverPre,$coverPos,count($fileList),$counter+1);
    }
    if (file_exists(stripFileeext($fileList[$counter]).".xml"))
    {
        print("[".date('h:i:s').'] Skipping Existing Metadata "'
              .stripExts($movieList[$counter]).'"'."\n");
    }
    else
    {
        getXmlmetadata($fileList[$counter],$movieList[$counter],$searchString,
                        $metadataPre,count($fileList),$counter+1,$argv[2]);
    }
    $counter++;
}
print("[".date('h:i:s')."] Finished!\n");
?>