PoshCode Logo PowerShell Code Repository

Select-Xml 2.2 (modification of post by Joel Bennett view diff)
View followups from Joel Bennett | diff | embed code: <script type="text/javascript" src="http://PoshCode.org/embed/1504"></script>download | new post

Improves over the built-in Select-XML by leveraging Remove-XmlNamespace to provide a -RemoveNamespace parameter — if it’s supplied, all of the namespace declarations and prefixes are removed from all XML nodes (by an XSL transform) before searching. Note that this means that the returned results will not have namespaces in them, even if the input XML did.

Also, only raw XmlNodes are returned from this function, so the output isn’t currently compatible with the built in Select-Xml, but is equivalent to using Select-Xml … | Select-Object -Expand Node

  1. #requires -version 2.0
  2. # Select-Xml 2.2 and Remove-XmlNamespace
  3. # Version History:
  4. # Select-Xml 2.0 was the first script version I wrote, and it didn't function identically to the built-in Select-Xml with regards to parameter parsing
  5. # Select-Xml 2.1 matched the built-in Select-Xml parameter sets, it's now a drop-in replacement if you were using the original with: Select-Xml ... | Select-Object -Expand Node
  6. # Select-Xml 2.2 fixes a bug in the -Content parameterset where -RemoveNamespace was *presumed*
  7.  
  8.  
  9. function Select-Xml {
  10. #.Synopsis
  11. #  The Select-XML cmdlet lets you use XPath queries to search for text in XML strings and documents. Enter an XPath query, and use the Content, Path, or Xml parameter to specify the XML to be searched.
  12. #.Description
  13. #  Improves over the built-in Select-XML by leveraging Remove-XmlNamespace to provide a -RemoveNamespace parameter -- if it's supplied, all of the namespace declarations and prefixes are removed from all XML nodes (by an XSL transform) before searching.  
  14. #  
  15. #  However, only raw XmlNodes are returned from this function, so the output isn't currently compatible with the built in Select-Xml, but is equivalent to using Select-Xml ... | Select-Object -Expand Node
  16. #
  17. #  Also note that if the -RemoveNamespace switch is supplied the returned results *will not* have namespaces in them, even if the input XML did, and entities get expanded automatically.
  18. #.Parameter Content
  19. #  Specifies a string that contains the XML to search. You can also pipe strings to Select-XML.
  20. #.Parameter Namespace
  21. #   Specifies a hash table of the namespaces used in the XML. Use the format @{<namespaceName> = <namespaceUri>}.
  22. #.Parameter Path
  23. #   Specifies the path and file names of the XML files to search.  Wildcards are permitted.
  24. #.Parameter Xml
  25. #  Specifies one or more XML nodes to search.
  26. #.Parameter XPath
  27. #  Specifies an XPath search query. The query language is case-sensitive. This parameter is required.
  28. #.Parameter RemoveNamespace
  29. #  Allows the execution of XPath queries without namespace qualifiers.
  30. #  
  31. #  If you specify the -RemoveNamespace switch, all namespace declarations and prefixes are actually removed from the Xml before the XPath search query is evaluated, and your XPath query should therefore NOT contain any namespace prefixes.
  32. #
  33. #  Note that this means that the returned results *will not* have namespaces in them, even if the input XML did, and entities get expanded automatically.
  34. [CmdletBinding(DefaultParameterSetName="Xml")]
  35. PARAM(
  36.    [Parameter(Position=1,ParameterSetName="Path",Mandatory=$true,ValueFromPipelineByPropertyName=$true)]
  37.    [ValidateNotNullOrEmpty()]
  38.    [Alias("PSPath")]
  39.    [String[]]$Path
  40. ,
  41.    [Parameter(Position=1,ParameterSetName="Xml",Mandatory=$true,ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
  42.    [ValidateNotNullOrEmpty()]
  43.    [Alias("Node")]
  44.    [System.Xml.XmlNode[]]$Xml
  45. ,
  46.    [Parameter(ParameterSetName="Content",Mandatory=$true,ValueFromPipeline=$true)]
  47.    [ValidateNotNullOrEmpty()]
  48.    [String[]]$Content
  49. ,
  50.    [Parameter(Position=0,Mandatory=$true,ValueFromPipeline=$false)]
  51.    [ValidateNotNullOrEmpty()]
  52.    [Alias("Query")]
  53.    [String[]]$XPath
  54. ,
  55.    [Parameter(Mandatory=$false)]
  56.    [ValidateNotNullOrEmpty()]
  57.    [Hashtable]$Namespace
  58. ,
  59.    [Switch]$RemoveNamespace
  60. )
  61. BEGIN {
  62.    function Select-Node {
  63.    PARAM([Xml.XmlNode]$Xml, [String[]]$XPath, $NamespaceManager)
  64.    BEGIN {
  65.       foreach($node in $xml) {
  66.          if($NamespaceManager -is [Hashtable]) {
  67.             $nsManager = new-object System.Xml.XmlNamespaceManager $node.NameTable
  68.             foreach($ns in $Namespace.GetEnumerator()) {
  69.                $nsManager.AddNamespace( $ns.Key, $ns.Value )
  70.             }
  71.          }
  72.          
  73.          foreach($path in $xpath) {
  74.             $node.SelectNodes($path, $NamespaceManager)
  75.    }  }  }  }
  76.  
  77.    [Text.StringBuilder]$XmlContent = [String]::Empty
  78. }
  79.  
  80. PROCESS {
  81.    $NSM = $Null; if($PSBoundParameters.ContainsKey("Namespace")) { $NSM = $Namespace }
  82.  
  83.    switch($PSCmdlet.ParameterSetName) {
  84.       "Content" {
  85.          $null = $XmlContent.AppendLine( $Content -Join "`n" )
  86.       }
  87.       "Path" {
  88.          foreach($file in Get-ChildItem $Path) {
  89.             [Xml]$Xml = Get-Content $file
  90.             if($RemoveNamespace) {
  91.                $Xml = Remove-XmlNamespace $Xml
  92.             }
  93.             Select-Node $Xml $XPath  $NSM
  94.          }
  95.       }
  96.       "Xml" {
  97.          foreach($node in $Xml) {
  98.             if($RemoveNamespace) {
  99.                $node = Remove-XmlNamespace $node
  100.             }
  101.             Select-Node $node $XPath $NSM
  102.          }
  103.       }
  104.    }
  105. }
  106. END {
  107.    if($PSCmdlet.ParameterSetName -eq "Content") {
  108.       [Xml]$Xml = $XmlContent.ToString()
  109.       if($RemoveNamespace) {
  110.          $Xml = Remove-XmlNamespace $Xml
  111.       }
  112.       Select-Node $Xml $XPath  $NSM
  113.    }
  114. }
  115.  
  116. }
  117.  
  118.  
  119.  
  120.  
  121. function Remove-XmlNamespace {
  122. #.Synopsis
  123. #  Removes namespace definitions and prefixes from xml documents
  124. #.Description
  125. #  Runs an xml document through an XSL Transformation to remove namespaces from it if they exist.
  126. #  Entities are also naturally expanded
  127. #.Parameter Content
  128. #  Specifies a string that contains the XML to transform.
  129. #.Parameter Path
  130. #  Specifies the path and file names of the XML files to transform. Wildcards are permitted.
  131. #
  132. #  There will bne one output document for each matching input file.
  133. #.Parameter Xml
  134. #  Specifies one or more XML documents to transform
  135. #
  136. [CmdletBinding(DefaultParameterSetName="Xml")]
  137. PARAM(
  138.    [Parameter(ParameterSetName="Content",Mandatory=$true)]
  139.    [String[]]$Content
  140. ,
  141.    [Parameter(Position=0,ParameterSetName="Path",Mandatory=$true,ValueFromPipelineByPropertyName=$true)]
  142.    [Alias("FullName")]
  143.    [String[]]$Path
  144. ,
  145.    [Parameter(Position=0,ParameterSetName="Xml",Mandatory=$true,ValueFromPipeline=$true)]
  146.    [Alias("IO","InputObject")]
  147.    [System.Xml.XmlDocument[]]$Xml
  148. )
  149. BEGIN {
  150.    $xslt = New-Object System.Xml.Xsl.XslCompiledTransform
  151.    $xslt.Load(([System.Xml.XmlReader]::Create((New-Object System.IO.StringReader @"
  152. <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  153.   <xsl:output method="xml" indent="yes"/>
  154.   <xsl:template match="/|comment()|processing-instruction()">
  155.      <xsl:copy>
  156.         <xsl:apply-templates/>
  157.      </xsl:copy>
  158.   </xsl:template>
  159.  
  160.   <xsl:template match="*">
  161.      <xsl:element name="{local-name()}">
  162.         <xsl:apply-templates select="@*|node()"/>
  163.      </xsl:element>
  164.   </xsl:template>
  165.  
  166.   <xsl:template match="@*">
  167.      <xsl:attribute name="{local-name()}">
  168.         <xsl:value-of select="."/>
  169.      </xsl:attribute>
  170.   </xsl:template>
  171. </xsl:stylesheet>
  172. "@))))
  173. }
  174. PROCESS {
  175.    switch($PSCmdlet.ParameterSetName) {
  176.       "Content" {
  177.          [System.Xml.XmlDocument[]]$Xml = @( [Xml]($Content -Join "`n") )
  178.       }
  179.       "Path" {
  180.          [System.Xml.XmlDocument[]]$Xml = @()
  181.          foreach($file in Get-ChildItem $Path) {
  182.             $Xml += [Xml](Get-Content $file)
  183.          }
  184.       }
  185.       "Xml" {
  186.       }
  187.    }
  188.    foreach($input in $Xml) {
  189.       $Output = New-Object System.Xml.XmlDocument
  190.       $writer =$output.CreateNavigator().AppendChild()
  191.       $xslt.Transform( $input.CreateNavigator(), $null, $writer )
  192.       $writer.Close() # $writer.Dispose()
  193.       Write-Output $output
  194.    }
  195. }
  196. }

Submit a correction or amendment below (
click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:


Remember me