PoshCode Logo PowerShell Code Repository

Get-WebVideoFile by Jan Egil Ring 5 years ago
embed code: <script type="text/javascript" src="http://PoshCode.org/embed/3399"></script>download | new post

&#65279;Download video-files from the specified RSS-feed URL, based on HTML scraping and a regular expression for finding the download URL.Joel Bennett`s Get-WebFile function from poshcode.org, which provides progress status during download, is used for downloading the files.

  1. <#
  2. .SYNOPSIS
  3. Download video-files from the specified RSS-feed URL, based on HTML scraping and a regular expression.
  4.  
  5. .DESCRIPTION
  6. Download video-files from the specified RSS-feed URL, based on HTML scraping and a regular expression for finding the download URL.
  7. Joel Bennett`s Get-WebFile function from poshcode.org, which provides progress status during download, is used for downloading the files.
  8. The script was originally created for downloading wmv-files from Microsoft TechNet Edge (http://technet.microsoft.com/en-us/edge).
  9.  
  10. .PARAMETER RssUrl
  11. The URL for the RSS feed to process
  12.  
  13. .PARAMETER destination
  14. The destination-folder for the downloaded video files. If not specified, the downloaded files will be placed in the current user`s Video-folder ($home\Videos).
  15.  
  16. .PARAMETER UseOriginalFileName
  17. Switch-parameter to specify usage of original filenames. If not specified the RSS title will be used as filename.
  18.  
  19. .PARAMETER UrlRegex
  20. A regular expression used to search for video URL`s. If not specified a regular expression for finding wmv-files on TechNet Edge is used.
  21.  
  22. .EXAMPLE
  23. .\Get-WebVideoFile.ps1 -RssUrl "http://technet.microsoft.com/en-us/edge/SyndicationGetTopics/cc543196.aspx?field=Category&value=System Center 2012&ancestor=ff524487&version=MSDN.10"
  24.  
  25. .EXAMPLE
  26. .\Get-WebVideoFile.ps1 -Destination "C:\TechNet Edge Videos\" -RssUrl "http://technet.microsoft.com/en-us/edge/SyndicationGetTopics/cc543196.aspx?field=Category&value=System Center 2012&ancestor=ff524487&version=MSDN.10"
  27.  
  28. .EXAMPLE
  29. .\Get-WebVideoFile.ps1 -UseOriginalFileName -RssUrl "http://technet.microsoft.com/en-us/edge/SyndicationGetTopics/cc543196.aspx?field=Category&value=System Center 2012&ancestor=ff524487&version=MSDN.10"
  30.  
  31. .EXAMPLE
  32. .\Get-WebVideoFile.ps1 -Verbose -RssUrl "http://technet.microsoft.com/en-us/edge/SyndicationGetTopics/cc543196.aspx?field=Category&value=System Center 2012&ancestor=ff524487&version=MSDN.10"
  33.  
  34.  
  35. .NOTES
  36.  
  37.  Name: Get-WebVideoFile.ps1
  38.  Author: Jan Egil Ring
  39.  Website: http://blog.powershell.no
  40.  
  41.  Usage:
  42.  1) Find and browse to the category you want to download files from. Available categories: http://technet.microsoft.com/en-us/edge/ff701756
  43.  2) Find the RSS URL by clicking the RSS-icon next to the category title on the top of the website
  44.  3) Specify the URL on the URL-parameter: .\Get-WebVideoFile.ps1 -RssUrl "http://technet.microsoft.com/en-us/edge/Syndication..."
  45.  
  46.  You have a royalty-free right to use, modify, reproduce, and
  47.  distribute this script file in any way you find useful, provided that
  48.  you agree that the creator, owner above has no warranty, obligations,
  49.  or liability for such use.
  50.  
  51.  VERSION HISTORY:
  52.  1.0 05.05.2012 - Initial release
  53.  
  54. #Requires -Version 2.0
  55. #>
  56.  
  57.  
  58. Param(
  59.        [Parameter(Mandatory=$true)]
  60.            [string]$RssUrl,
  61.        [string]$Destination = "$home\Videos\",
  62.        [switch]$UseOriginalFileName,
  63.        [regex]$UrlRegex = "(?<url>http://content\d.catalog.video.msn.com/../../[0-f]{8}-[0-f]{4}-[0-f]{4}-[0-f]{4}-[0-f]{12}(?<file>[^>]*?wmv))"
  64. )
  65.  
  66.  
  67. # Get-WebFile function from http://poshcode.org/3219
  68. function Get-WebFile {
  69.    param(
  70.       $url = (Read-Host "The URL to download"),
  71.       $fileName = $null,
  72.       [switch]$Passthru,
  73.       [switch]$quiet
  74.    )
  75.    
  76.    if($url.contains("http"))
  77.    {
  78.    $req = [System.Net.HttpWebRequest]::Create($url);
  79.    }
  80.    else
  81.    {
  82.    $URL_Format_Error = [string]"Connection protocol not specified. Recommended action: Try again using protocol (for example 'http://" + $url + "') instead. Function aborting...";
  83.    Write-Error $URL_Format_Error;
  84.    return;
  85.    }
  86.    
  87.    $req.CookieContainer = New-Object System.Net.CookieContainer
  88.  
  89.    try{
  90.    $res = $req.GetResponse();
  91.    }
  92.    catch
  93.    {
  94.    Write-Error $error[0].Exception.InnerException.Message;
  95.    return;
  96.    }
  97.  
  98.    if($fileName -and !(Split-Path $fileName)) {
  99.       $fileName = Join-Path (Get-Location -PSProvider "FileSystem") $fileName
  100.    }
  101.    elseif((!$Passthru -and ($fileName -eq $null)) -or (($fileName -ne $null) -and (Test-Path -PathType "Container" $fileName)))
  102.    {
  103.       [string]$fileName = ([regex]'(?i)filename=(.*)$').Match( $res.Headers["Content-Disposition"] ).Groups[1].Value
  104.       $fileName = $fileName.trim("\/""'")
  105.       if(!$fileName) {
  106.          $fileName = $res.ResponseUri.Segments[-1]
  107.          $fileName = $fileName.trim("\/")
  108.          if(!$fileName) {
  109.             $fileName = Read-Host "Please provide a file name"
  110.          }
  111.          $fileName = $fileName.trim("\/")
  112.          if(!([IO.FileInfo]$fileName).Extension) {
  113.             $fileName = $fileName + "." + $res.ContentType.Split(";")[0].Split("/")[1]
  114.          }
  115.       }
  116.       $fileName = Join-Path (Get-Location -PSProvider "FileSystem") $fileName
  117.    }
  118.    if($Passthru) {
  119.       $encoding = [System.Text.Encoding]::GetEncoding( $res.CharacterSet )
  120.       [string]$output = ""
  121.    }
  122.  
  123.    if($res.StatusCode -eq 200) {
  124.       [long]$goal = $res.ContentLength
  125.       $reader = $res.GetResponseStream()
  126.       if($fileName) {
  127.          try{
  128.          $writer = new-object System.IO.FileStream $fileName, "Create"
  129.          }
  130.          catch{
  131.          Write-Error $error[0].Exception.InnerException.Message;
  132.          return;
  133.          }
  134.       }
  135.       [byte[]]$buffer = new-object byte[] 4096
  136.       [long]$total = [long]$count = 0
  137.       do
  138.       {
  139.          $count = $reader.Read($buffer, 0, $buffer.Length);
  140.          if($fileName) {
  141.             $writer.Write($buffer, 0, $count);
  142.          }
  143.          if($Passthru){
  144.             $output += $encoding.GetString($buffer,0,$count)
  145.          } elseif(!$quiet) {
  146.             $total += $count
  147.             if($goal -gt 0) {
  148.                Write-Progress "Downloading $url" "Saving $total of $goal" -id 0 -percentComplete (($total/$goal)*100)
  149.             } else {
  150.                Write-Progress "Downloading $url" "Saving $total bytes..." -id 0
  151.             }
  152.          }
  153.       } while ($count -gt 0)
  154.      
  155.       $reader.Close()
  156.       if($fileName) {
  157.          $writer.Flush()
  158.          $writer.Close()
  159.       }
  160.       if($Passthru){
  161.          $output
  162.       }
  163.    }
  164.    $res.Close();
  165.    if($fileName) {
  166.       ls $fileName
  167.    }
  168. }
  169.  
  170.  
  171. $wc = New-Object net.webclient
  172. [xml]$xml = $wc.DownloadString($rssurl)
  173. $itemcount = $xml.rss.channel.item.count
  174. $count = 0
  175.  
  176. $xml.rss.channel.item | foreach {
  177.  
  178. $count ++
  179.  
  180. Write-Verbose "Processing RSS item $count of $itemcount : $($_.title)"
  181.  
  182. $string = $wc.DownloadString($_.link)
  183.  
  184.     if ($string -match $urlregex) {
  185.    
  186.     Write-Verbose "URL regex matched"
  187.    
  188.         $url = $matches.url        
  189.     }
  190.     else {
  191.    
  192.     Write-Verbose "URL regex did not match"
  193.    
  194.     return
  195.  
  196.     }
  197.  
  198.     if ($UseOriginalFileName) {
  199.  
  200.          $file = $url.split("/")[-1]
  201.  
  202.          }
  203.      
  204.       else {
  205.  
  206.          $file = $_.Title
  207.  
  208.          # Remove illegal filename characters / ? * : ; { } \ |
  209.          foreach ($character in ('/','?','*',':',';','{','}','\','|')) {
  210.          $file = $file.Replace($character,'')
  211.          }
  212.  
  213.          $file = $file + '.' + $url.split(".")[-1]
  214.          }
  215.  
  216. if ($url) {
  217.  
  218. $filepath =  "$destination$file"
  219.         if (Test-Path $filepath)
  220.         {Write-Verbose "$file is already present"}
  221.         else {
  222.             Write-Verbose "Downloading $file"
  223.  
  224.  
  225. Get-WebFile -url $url -filename $filepath
  226.  
  227.  
  228.         }    
  229.     }
  230. }

Submit a correction or amendment below (
click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:


Remember me