I need to parse a XML file having the structure as follows: (I can't show the data as it is confidential)
<?xml version="1.0" encoding="UTF-8"?>
<GACDWBulkLoadInterface xsi:schemaLocation="http://www.example.org/GACDWSchema GACDWSchema.xsd" xmlns="http://www.example.org/GACDWSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<BLConfiguration>
<BLProperties>
<BLProperty>
<key>isEmpty</key>
<value xmlns:xsd="http://www.w3.org/2001/XMLSchema" xsi:type="xsd:boolean">true</value>
</BLProperty>
</BLProperties>
<version>api-ag-1.1.68-20210315-43</version>
</BLConfiguration>
<Customer>
<CustomerID></CustomerID>
<Customer_Name></Customer_Name>
<ISOCountryCode></ISOCountryCode>
<customerType></customerType>
<dataSourceProvider></dataSourceProvider>
<function></function>
<TopLevelCI>
<Contact Action="Create">
<Id>4</Id>
<artifactType></artifactType>
<contactType></contactType>
<email></email>
<firstName></firstName>
<lastName></lastName>
<userID></userID>
<ChildCI/>
</Contact>
<PhysicalComputerSystem Action="Create">
<Id>2</Id>
<artifactType></artifactType>
<assetLifeCycleState></assetLifeCycleState>
<category></category>
<ciLifeCycleState></ciLifeCycleState>
<custname></custname>
<description></description>
<discontinuedDate></discontinuedDate>
<machineType></machineType>
<managed></managed>
<manufacturer></manufacturer>
<model></model>
<serialNumber></serialNumber>
<virtualFlag></virtualFlag>
<ChildCI>
<PhysicalComputerSystemContact Action="Create">
<contactUID></contactUID>
</PhysicalComputerSystemContact>
<PhysicalComputerSystemLocation Action="Create">
<buildingID></buildingID>
<buildingName></buildingName>
<city></city>
<countryName></countryName>
<isoCountryCode></isoCountryCode>
<postalCode></postalCode>
<siteName></siteName>
<stateProvince></stateProvince>
<streetAddress></streetAddress>
</PhysicalComputerSystemLocation>
</ChildCI>
</PhysicalComputerSystem>
<OperatingSystem Action="Create">
<Id>1</Id>
<aliasHostname></aliasHostname>
<artifactType></artifactType>
<assetLifeCycleState></assetLifeCycleState>
<backupContr></backupContr>
<barSaId></barSaId>
<cacfCustomGroup></cacfCustomGroup>
<category></category>
<ciLifeCycleState></ciLifeCycleState>
<classification></classification>
<cpuCount></cpuCount>
<description></description>
<discontinuedDate></discontinuedDate>
<fqhn></fqhn>
<hostsystemID></hostsystemID>
<hostsystemType></hostsystemType>
<installationDate></installationDate>
<ipAddress></ipAddress>
<majorBusProc></majorBusProc>
<managed></managed>
<memory></memory>
<memoryUnit></memoryUnit>
<osModLevel></osModLevel>
<osName></osName>
<osBuildNumber></osBuildNumber>
<osProvider></osProvider>
<osRelease></osRelease>
<osVersion></osVersion>
<prodDate></prodDate>
<purgeDate></purgeDate>
<purpose></purpose>
<securityClass></securityClass>
<serverType></serverType>
<supportEnddate></supportEnddate>
<useAliasHostname></useAliasHostname>
<virtualFlag></virtualFlag>
<ChildCI>
<OperatingSystemContact Action="Create">
<contactUID></contactUID>
</OperatingSystemContact>
<OperatingSystemEnvironmentProfile Action="Create">
<authFlag></authFlag>
<authExceptDate></authExceptDate>
<authExceptReasonCode></authExceptReasonCode>
<authExceptText></authExceptText>
<disasterRecSys></disasterRecSys>
<domain></domain>
<extUsersMax></extUsersMax>
<hcAutoInterv></hcAutoInterv>
<hcExceptDate></hcExceptDate>
<hcExceptReasonCode></hcExceptReasonCode>
<hcExceptText></hcExceptText>
<hcUnit></hcUnit>
<healthchAutoFlag></healthchAutoFlag>
<highestClientDataClassification></highestClientDataClassification>
<highestDataClassification></highestDataClassification>
<highestIbmDataClassification></highestIbmDataClassification>
<ibmCbnInterval></ibmCbnInterval>
<ibmCbnRequired></ibmCbnRequired>
<ibmCbnType></ibmCbnType>
<ibmCbnUnit></ibmCbnUnit>
<ibmPrivIDUnit></ibmPrivIDUnit>
<ibmPrivType></ibmPrivType>
<ibmQevFlag></ibmQevFlag>
<ibmQevInterval></ibmQevInterval>
<ibmQevUnit></ibmQevUnit>
<ibmUsersMax></ibmUsersMax>
<internetAccFlag></internetAccFlag>
<personalData></personalData>
<privIDFlag></privIDFlag>
<privIDInterv></privIDInterv>
<regulatoryRequirements></regulatoryRequirements>
<remediationtimeHi></remediationtimeHi>
<remediationtimeLo></remediationtimeLo>
<remediationtimeMd></remediationtimeMd>
<rerFlag></rerFlag>
<rerInterv></rerInterv>
<rerType></rerType>
<rerUnit></rerUnit>
<scExceptDate></scExceptDate>
<scExceptReasonCode></scExceptReasonCode>
<scExceptText></scExceptText>
<secExemptionDesc></secExemptionDesc>
<secExemptionFlag></secExemptionFlag>
<securityPackage></securityPackage>
<securityPolicyCategory></securityPolicyCategory>
<securityPolicyOther></securityPolicyOther>
<securityPolicyType></securityPolicyType>
<serverPurpose></serverPurpose>
<sharedIDFlag></sharedIDFlag>
<shrdExceptDate></shrdExceptDate>
<shrdExceptReasonCode></shrdExceptReasonCode>
<shrdExceptText></shrdExceptText>
<vitalBusProcessYN></vitalBusProcessYN>
</OperatingSystemEnvironmentProfile>
<OperatingSystemIpAddress Action="Create">
<dotnotation></dotnotation>
<hostName></hostName>
<isPrimaryIpHost></isPrimaryIpHost>
<name></name>
<netMask></netMask>
<vsExceptDate></vsExceptDate>
<vsExceptReasonCode></vsExceptReasonCode>
<vsExceptText></vsExceptText>
<vulScanUnit></vulScanUnit>
<vulscanAutoFlag></vulscanAutoFlag>
<vulscanAutoInterval></vulscanAutoInterval>
</OperatingSystemIpAddress>
</ChildCI>
</OperatingSystem>
<Subsystem Action="Create">
<Id>3</Id>
<applSIDataType></applSIDataType>
<artifactType></artifactType>
<assetLifeCycleState></assetLifeCycleState>
<ciLifeCycleState></ciLifeCycleState>
<controlProductName></controlProductName>
<controlProductRelease></controlProductRelease>
<controlProductType></controlProductType>
<controlProductVendor></controlProductVendor>
<controlProductVersion></controlProductVersion>
<description></description>
<discontinuedDate></discontinuedDate>
<fqhn></fqhn>
<managed></managed>
<name></name>
<parentUID></parentUID>
<piFlag></piFlag>
<productModLevel></productModLevel>
<productName></productName>
<productPatchLevel></productPatchLevel>
<productRelease></productRelease>
<productVersion></productVersion>
<purgeDate></purgeDate>
<scanTime></scanTime>
<vendorName></vendorName>
<ChildCI>
<SubsystemContact Action="Create">
<contactUID></contactUID>
</SubsystemContact>
<SubsystemEnvironmentProfile Action="Create">
<authFlag></authFlag>
<authExceptDate></authExceptDate>
<authExceptReasonCode></authExceptReasonCode>
<authExceptText></authExceptText>
<disasterRecSys></disasterRecSys>
<hcAutoInterv></hcAutoInterv>
<hcExceptDate></hcExceptDate>
<hcExceptReasonCode></hcExceptReasonCode>
<hcExceptText></hcExceptText>
<hcUnit></hcUnit>
<healthchAutoFlag></healthchAutoFlag>
<ibmCbnInterval></ibmCbnInterval>
<ibmCbnRequired></ibmCbnRequired>
<ibmCbnType></ibmCbnType>
<ibmCbnUnit></ibmCbnUnit>
<ibmPrivIDUnit></ibmPrivIDUnit>
<ibmPrivType></ibmPrivType>
<ibmQevFlag></ibmQevFlag>
<ibmQevInterval></ibmQevInterval>
<ibmQevUnit></ibmQevUnit>
<privIDFlag></privIDFlag>
<privIDInterv></privIDInterv>
<rerFlag></rerFlag>
<rerInterv></rerInterv>
<rerType></rerType>
<rerUnit></rerUnit>
<scExceptDate></scExceptDate>
<scExceptReasonCode></scExceptReasonCode>
<scExceptText></scExceptText>
<sharedIDFlag></sharedIDFlag>
<shrdExceptDate></shrdExceptDate>
<shrdExceptReasonCode></shrdExceptReasonCode>
<shrdExceptText></shrdExceptText>
<vitalBusProcessYN></vitalBusProcessYN>
</SubsystemEnvironmentProfile>
</ChildCI>
</Subsystem>
</TopLevelCI>
<Relationship>
<RelationshipName>Hosted_by</RelationshipName>
<sourceCIType>OperatingSystem</sourceCIType>
<targetCIType>OperatingSystem</targetCIType>
</Relationship>
<Relationship>
<RelationshipName>Runs_on</RelationshipName>
<sourceCIType>Subsystem</sourceCIType>
<targetCIType>OperatingSystem</targetCIType>
</Relationship>
<Relationship>
<RelationshipName>Runs_on_hw</RelationshipName>
<sourceCIType>OperatingSystem</sourceCIType>
<targetCIType>PhysicalComputerSystem</targetCIType>
</Relationship>
</Customer>
</GACDWBulkLoadInterface>
I have researched and come up with my VBA code here
Sub ReadXML()
Dim oXML As MSXML2.DOMDocument60
Dim vaFile As Variant
Set oXML = New MSXML2.DOMDocument60
'Open Browse file dialog
vaFile = Application.GetOpenFilename("XML Files (*.xml), *.xml", _
Title:="Select XML files", MultiSelect:=False)
oXML.validateOnParse = True
oXML.setProperty "SelectionLanguage", "XPath" ' necessary in version 3.0, possibly redundant here
oXML.async = False
If Not oXML.Load(vaFile) Then 'Load XML has gone bad
Dim xPE As Object ' Set xPE = CreateObject("MSXML2.IXMLDOMParseError")
Dim strErrText As String
Set xPE = oXML.parseError
With xPE
strErrText = "Load error " & .ErrorCode & " xml file " & vbCrLf & _
Replace(.URL, "file:///", "") & vbCrLf & vbCrLf & _
xPE.reason & _
"Source Text: " & .srcText & vbCrLf & vbCrLf & _
"Line No.: " & .Line & vbCrLf & _
"Line Pos.: " & .linepos & vbCrLf & _
"File Pos.: " & .filepos & vbCrLf & vbCrLf
End With
MsgBox strErrText, vbExclamation
Set xPE = Nothing
Exit Sub
End If
Debug.Print "|" & oXML.XML & "|"
Dim nodeList As MSXML2.IXMLDOMNodeList, iNode As MSXML2.IXMLDOMNode
Dim Searched As String
Searched = "/*/*"
Set nodeList = oXML.SelectNodes(Searched)
'Set Queries = oXML.DocumentElement.SelectNodes(Searched)
For Each iNode In nodeList
'Debug.Print "<" & iNode.BaseName & ">"
Debug.Print "<" & iNode.nodeName & ">"
Next
However, My problem is no matter what I come up with Xpath queries. None of them work except \* or \.
I can't specific any path. Both .SelectSingleNode or .SelectNodes alway return length =0. Is there any problem with my code or my XML file or Xpath syntax?
As mentioned in comments your xml document has namespace definitions in its DocumentElement <GACDWBulkLoadInterface> (xmlns stands for xml name space). Furthermore "it contains a default namespace so any attempted parsing on named nodes must map to this namespace URI otherwise returns nothing."
To allow eventual analysis it's necessary to include a user defined prefix (e.g. :s) into explicit namespace settings, which can be used in later XPath expressions:
Dim oXML As MSXML2.DOMDocument60
Set oXML = New MSXML2.DOMDocument60
oXML.validateOnParse = True
Dim XMLNamespaces As String
XMLNamespaces = "xmlns:s='http://www.example.org/GACDWSchema'"
oXML.SetProperty "SelectionNamespaces", XMLNamespaces
Eventually you can define any XPath expression, e.g. the childnodes of Customer[1]:
Dim nodeList As MSXML2.IXMLDOMNodeList, iNode As MSXML2.IXMLDOMNode
Dim Searched As String
Searched = "//s:Customer/s:*"
Set nodeList = oXML.SelectNodes(Searched)
Related links
Displaying XML structures including attributes via recursive function calls
Display XML with hierarchy in cells
Apparently OP's source code: Parse DomDocument
Related
I read an XML document to collect user logon times.
<UserName="Jenny" Monday="7:00" Tuesday="7:30" Wednesday="0" Thursday="7:10" Friday="7:25" Saturday="6:00" Sunday="0"/><UserName="Simon" Monday="8:20" Tuesday="7:45" Wednesday="7:45" Thursday="7:10" Friday="7:25" Saturday="7:00" Sunday="0"/><UserName="Jenny" Monday="8:00" Tuesday="8:30" Wednesday="8:00" Thursday="7:10" Friday="7:25" Saturday="0" Sunday="0"/>Chris
I can get and paste info into a table on an Excel spreadsheet one by one.
Dim pTeamTimes As New XMLdoc
Dim objPlan As IXMLDOMElement
'build file path
strTeamXMLPath = "C:\Users\Public\Libraries\Times" & ".xml"
'load document
objPlan .LoadDocument strTeamXMLPath , "TeamTimes"
For Each objPlan In pTeamTimes.Root.ChildNodes
.Cells(intRow, 13) = User
.Cells(intRow, 14) = objPlan .getAttribute("Monday")
.Cells(intRow, 15) = objPlan .Attributes.getNamedItem("Tuesday").Text
.Cells(intRow, 16) = objPlan .Attributes.getNamedItem("Wednesday").Text
.Cells(intRow, 17) = objPlan .Attributes.getNamedItem("Thursday").Text
.Cells(intRow, 18) = objPlan .Attributes.getNamedItem("Friday").Text
.Cells(intRow, 19) = objPlan .Attributes.getNamedItem("Saturday").Text
.Cells(intRow, 20) = objPlan .Attributes.getNamedItem("Sunday").Text
intRow = intRow + 1
Next objPlan
This pastes the data into a table with the user name on the left and the times to the right row by row (see below).
User|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday
Is there a way to loop through the file and extract the appropriate data for the appropriate header?
I also played with .getAttribute("Monday") and .Attributes.getNamedItem("Tuesday").Text
Is there a major difference in using each of these methods?
Streamlining display of xml node attributes (array approach)
Your XML file snippet's syntax isn't correct. Each node has to show its node name after the opening bracket <SomeNode ...>, but this nodename cannot be followed immediately by an equals character = indicating always a following attribute assignment.
Therefore I built a wellformed xml structure to be able to illustrate a working MCV Example choosing to ("re")name the individual nodes <User ...> followed by UserName and weekday attributes (Monday="7:00" Tuesday="" ...).
<?xml version="1.0" encoding="UTF-8"?>
<AllUsers>
<Team>
<User UserName="Jenny" Monday="7:00" Tuesday="7:30" Wednesday="0" Thursday="7:10" Friday="7:25" Saturday="6:00" Sunday="0"/>
<User UserName="Simon" Monday="8:20" Tuesday="7:45" Wednesday="7:45" Thursday="7:10" Friday="7:25" Saturday="7:00" Sunday="0"/>
<User UserName="Jenny2" Monday="8:00" Tuesday="8:30" Wednesday="8:00" Thursday="7:10" Friday="7:25" Saturday="0" Sunday="0"/>
</Team>
</AllUsers>
Example code
Based on this assumed example structure (loaded here via .LoadXML), I demonstrate how to streamline code getting attributes via a NodeList loop based on the following xml content.
In order to allow a quickly reproducible example, I didn't refer to an external file via .Load, but to a pure string content (received by help function getContent()) loaded via .LoadXML. Of course loading an external file needs the following syntax: xDoc.Load strTeamXMLPath. - Btw several parts of the original code aren't clear, e.g. I don't know what's behind .LoadDocument
Sub GetAttributes()
Dim xdoc As MSXML2.DOMDocument60 ' early binding needs reference to Microsoft 'XML, v6.0'
Set xdoc = New MSXML2.DOMDocument60
If xdoc.LoadXML(getContent()) Then
Dim users As MSXML2.IXMLDOMNodeList
Set users = xdoc.SelectNodes("//Team/User") ' nodelist is zero-based
ReDim tmp(1 To users.Length, 1 To 8)
Dim i As Long
For i = 1 To users.Length
Dim j As Long
For j = 1 To users(i - 1).Attributes.Length
'assign to 1-based 2-dim tmp array
'(whereas users nodelist incl. node attributes list are zero-based!)
tmp(i, j) = users(i - 1).Attributes(j - 1).Text
'Debug.Print i & "." & j, tmp(i, j) ' optional display in VB Editor's immediate window
Next
Debug.Print i, Join(Application.Index(tmp, i, 0), "|")
Next i
Else ' XML Parse Error
Debug.Print getParseError(xdoc)
End If
' write tmp to any target
With Sheet1 ' << change to your project's sheet Code(Name)
' 'a) write captions starting from cell M1 (optional)
' For i = 1 To UBound(tmp, 2)
' .Range("M1").Offset(0, i - 1) = users(0).Attributes(i - 1).BaseName
' Next i
'write tmp results
.Range("M2").Resize(UBound(tmp, 1), UBound(tmp, 2)) = tmp
End With
End Sub
Additionaly you could test other syntax, e.g. for the 1st user (Jenny) to get the Monday attribute:
Debug.Print users(0).Attributes.getNamedItem("Monday").BaseName ' "Monday"
Debug.Print users(0).Attributes.getNamedItem("Monday").Text ' 7:00
Debug.Print xdoc.DocumentElement.SelectSingleNode("Team/User[1]/#Monday").Text ' 7:00
Help functions
Function getContent() gets an assumed minimal, but wellformed xml string content as described above replacing the unknown structure of OP's external file content.
Function getContent() As String
Dim tmp As String
tmp = "<?xml version=""1.0"" encoding=""UTF-8""?>" & vbNewLine
tmp = tmp & "<AllUsers>" & vbNewLine & " <Team>" & vbNewLine & _
vbTab & "<User UserName=""Jenny"" Monday=""7:00"" Tuesday=""7:30"" Wednesday=""0"" Thursday=""7:10"" Friday=""7:25"" Saturday=""6:00"" Sunday=""0""/>" & vbNewLine & _
vbTab & "<User UserName=""Simon"" Monday=""8:20"" Tuesday=""7:45"" Wednesday=""7:45"" Thursday=""7:10"" Friday=""7:25"" Saturday=""7:00"" Sunday=""0""/>" & vbNewLine & _
vbTab & "<User UserName=""Jenny2"" Monday=""8:00"" Tuesday=""8:30"" Wednesday=""8:00"" Thursday=""7:10"" Friday=""7:25"" Saturday=""0"" Sunday=""0""/>" & vbNewLine & _
" </Team>" & vbNewLine & "</AllUsers>"
getContent = tmp
Debug.Print getContent
End Function
Function getParseError(xdoc As MSXML2.DOMDocument60) As String
Dim xPE As MSXML2.IXMLDOMParseError
Set xPE = xdoc.parseError
With xPE
getParseError = "Load Error " & .ErrorCode & " XML File " & vbCrLf & _
Replace(.Url, "file:///", "") & vbCrLf & vbCrLf & _
xPE.reason & _
"Source Text: " & .srcText & vbCrLf & vbCrLf & _
"Line No: " & .Line & vbCrLf & _
"Line Pos: " & .linepos & vbCrLf & _
"File Pos: " & .filepos & vbCrLf & vbCrLf
End With
End Function
Trying to parse thorugh an XML file with VBA and print the results to an Excel spreadsheet. But whenever I get to creating a list with the following code it doesn't create one and I can't figure out why.
Set Songs = objDOM.SelectNodes("/channel/item")
Full code and XML is below
Function fnReadXMLByTags()
'Code From: https://excel-macro.tutorialhorizon.com/vba-excel-read-xml-by-looping-through-nodes/
'And Here: https://desmondoshiwambo.wordpress.com/2012/07/03/how-to-load-xml-from-a-local-file-with-msxml2-domdocument-6-0-and-loadxml-using-vba/
Dim mainWorkBook As Workbook
Set mainWorkBook = ActiveWorkbook
mainWorkBook.Sheets("XML_Parser").Range("A:C").Clear
Dim intFile As Integer
Dim strTemp As String
Dim strXML As String
Dim strOrderText As String
Dim objDOM As Object
Dim Songs As Object
XMLFileName = ThisWorkbook.Path & "\PlaylistFeed.xml"
'Open file
intFile = FreeFile
Open XMLFileName For Input As intFile
strXML = ""
'Load XML into string strXML
While Not EOF(intFile)
Line Input #intFile, strTemp
strXML = strXML & strTemp
Wend
Close intFile
'Load the XML into DOMDocument object
Set objDOM = CreateObject("MSXML2.DOMDocument.6.0")
objDOM.LoadXML strXML
mainWorkBook.Sheets("XML_Parser").Range("A1,B1,C1,D1").Interior.ColorIndex = 40
mainWorkBook.Sheets("XML_Parser").Range("A1,B1,C1,D1").Borders.Value = 1
mainWorkBook.Sheets("XML_Parser").Range("A" & 1).Value = "Song Number"
mainWorkBook.Sheets("XML_Parser").Range("B" & 1).Value = "Tag Number"
mainWorkBook.Sheets("XML_Parser").Range("C" & 1).Value = "Item Node"
mainWorkBook.Sheets("XML_Parser").Range("D" & 1).Value = "Value"
Set Songs = objDOM.SelectNodes("/channel/item")
mainWorkBook.Sheets("XML_Parser").Range("A" & 2).Value = Songs.Length 'always says 0
intCounter = 2
For i = 0 To Songs.Length - 1
For j = 0 To Songs(i).ChildNodes.Length - 1
mainWorkBook.Sheets("XML_Parser").Range("A" & intCounter).Value = i + 1
mainWorkBook.Sheets("XML_Parser").Range("B" & intCounter).Value = j + 1
mainWorkBook.Sheets("XML_Parser").Range("C" & intCounter).Value = Songs(i).ChildNodes(j).tagname
mainWorkBook.Sheets("XML_Parser").Range("D" & intCounter).Value = Songs(i).ChildNodes(j).Text
mainWorkBook.Sheets("XML_Parser").Range("A" & intCounter).Borders.Value = 1
mainWorkBook.Sheets("XML_Parser").Range("B" & intCounter).Borders.Value = 1
mainWorkBook.Sheets("XML_Parser").Range("C" & intCounter).Borders.Value = 1
mainWorkBook.Sheets("XML_Parser").Range("D" & intCounter).Borders.Value = 1
intCounter = intCounter + 1
mainWorkBook.Sheets("XML_Parser").Range("A" & intCounter, "B" & intCounter, "C" & intCounter, "D" & intCounter).Interior.ColorIndex = 40
mainWorkBook.Sheets("XML_Parser").Range("A" & intCounter, "B" & intCounter, "C" & intCounter, "D" & intCounter).Borders.Value = 1
mainWorkBook.Sheets("XML_Parser").Range("A" & intCounter).Value = "Song Number"
mainWorkBook.Sheets("XML_Parser").Range("B" & intCounter).Value = "Tag Number"
mainWorkBook.Sheets("XML_Parser").Range("C" & intCounter).Value = "Item Node"
mainWorkBook.Sheets("XML_Parser").Range("D" & intCounter).Value = "Value"
Next
intCounter = intCounter + 1
Next
End Function
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
<channel>
<title>WTMD Playlist</title>
<link>http://www.wtmd.org</link>
<description>Recently Played Songs at WTMD</description>
<item>
<title>Son Volt - Drown</title>
<link>
http://www.amazon.com/exec/obidos/external-search?tag=wt897fmrafomu-20&index=digital-music-track&keyword=Son+Volt%2BDrown
</link>
<description>Album: Trace</description>
<pubDate>Mon, 03 Dec 2012 20:09:44 GMT</pubDate>
<guid>
http://www.amazon.com/exec/obidos/external-search?tag=wt897fmrafomu-20&index=digital-music-track&keyword=Son+Volt%2BDrown
</guid>
<dc:date>2012-12-03T20:09:44Z</dc:date>
</item>
<item>
<title>Allen Stone - Sleep</title>
<link>
http://www.amazon.com/exec/obidos/external-search?tag=wt897fmrafomu-20&index=digital-music-track&keyword=Allen+Stone%2BSleep
</link>
<description>Album: Allen Stone</description>
<pubDate>Mon, 03 Dec 2012 20:07:19 GMT</pubDate>
<guid>
http://www.amazon.com/exec/obidos/external-search?tag=wt897fmrafomu-20&index=digital-music-track&keyword=Allen+Stone%2BSleep
</guid>
<dc:date>2012-12-03T20:07:19Z</dc:date>
</item>
When testing the length the Songs list always comes at zero and I can't figure out why. Everything I've tried either has the same problem or gives an error.
As QHarr pointed out, in parts of the XML document, you have text containing the & symbol (ampersand), for example:
tag=wt897fmrafomu-20&index=digital-music-track
The & symbol on its own is not allowed as part of well formed XML and needs to be replaced by the entity reference &:
tag=wt897fmrafomu-20&index=digital-music-track
The XPath query in the call to selectNodes is also incorrect. You have:
Set Songs = objDOM.SelectNodes("/channel/item")
That would only be correct if "channel" was the root node of the XML document. In fact, "rss" is the root node so you would need to use this instead:
Set Songs = objDOM.SelectNodes("/rss/channel/item")
The calls to set the color index and borders are incorrect. You should have this instead:
mainWorkBook.Sheets("XML_Parser").Range("A" & intCounter & ":D" & intCounter).Interior.ColorIndex = 40
mainWorkBook.Sheets("XML_Parser").Range("A" & intCounter & ":D" & intCounter).Borders.Value = 1
The XML document as shown in the question is incomplete so I'm assuming that the remaining tags were added to close the "channel" and "rss" tags. I'm also assuming that an XML declaration was added at the start:
<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
<channel>
<title>WTMD Playlist</title>
<link>http://www.wtmd.org</link>
<description>Recently Played Songs at WTMD</description>
<item>
<title>Son Volt - Drown</title>
<link>
http://www.amazon.com/exec/obidos/external-search?tag=wt897fmrafomu-20&index=digital-music-track&keyword=Son+Volt%2BDrown
</link>
<description>Album: Trace</description>
<pubDate>Mon, 03 Dec 2012 20:09:44 GMT</pubDate>
<guid>
http://www.amazon.com/exec/obidos/external-search?tag=wt897fmrafomu-20&index=digital-music-track&keyword=Son+Volt%2BDrown
</guid>
<dc:date>2012-12-03T20:09:44Z</dc:date>
</item>
<item>
<title>Allen Stone - Sleep</title>
<link>
http://www.amazon.com/exec/obidos/external-search?tag=wt897fmrafomu-20&index=digital-music-track&keyword=Allen+Stone%2BSleep
</link>
<description>Album: Allen Stone</description>
<pubDate>Mon, 03 Dec 2012 20:07:19 GMT</pubDate>
<guid>
http://www.amazon.com/exec/obidos/external-search?tag=wt897fmrafomu-20&index=digital-music-track&keyword=Allen+Stone%2BSleep
</guid>
<dc:date>2012-12-03T20:07:19Z</dc:date>
</item>
</channel>
</rss>
I need to parse hundreds of XML files having all the same structure as follows:
<?xml version="1.0" encoding="UTF-8"?>
<Concepts>
<ConceptModel name="food">
<Filters>
<Filter type="CC"/>
</Filters>
<Queries>
<Query lang="EN">(cheese, bread, wine)</Query>
<Query lang="DE">(Käse, Brot, Wein)</Query>
<Query lang="FR">(fromaige, pain, vin)</Query>
</Queries>
</ConceptModel>
</Concepts>
I have read several articles and posts in internet like below but I could not come up with a solution:
Excel vba Parse Complex XML
Parse XML File with VBA
So far I am doing:
Dim oXml As MSXML2.DOMDocument
Set oXml = New MSXML2.DOMDocument
oXml.LoadXML ("C:\folder\folder\name.xml")
Dim Queries As IXMLDOMNodeList
Dim Query As IXMLDOMNode
ThisWorkbook.Sheets(3).Cells(i, 1) = "before loop"
Set Queries = oXml.SelectNodes("/concepts/Queries")
MsgBox "how many Queries " & Queries.Length
For Each Query In Queries
ThisWorkbook.Sheets(3).Cells(i, 1) = "Works"
ThisWorkbook.Sheets(3).Cells(i, 2) = Query.SelectNodes("Query").iTem(0).Text
i = i + 1
Next
This code seems to be understood by VBA but it does not read the contents. The loop does not get read, meaning (I guess) that Queries is not looped at all. Which is confirmed by the fact that the Msgbox "how many queries" gives 0 as result. But actually there are three queries. Could someone give me a hand?
As second issue I would like to ask if
Dim oXml As MSXML2.DOMDocument
would be the same as
Dim oXml As MSXML2.DOMDocument60
Since I checked in tools/references "Microsof XML, v6.0"
I thought that the queries having a tag
might cause a problem. and I added the follwoing lines:
Dim childs As IXMLDOMNodeList
Set childs = oXml.SelectNodes("/concepts")
MsgBox "childs " & childs.Length
which also gives 0 as result. I would expect 3, since concepts has three children, namely ConceptModel, Filter and Queries. So, I am even more puzzled.
As close as possible to your OP
I 'd draw your attention to several errors or misunderstandings:
[1] Invalid .LoadXML Syntax
What is then the difference between .LoadXML ("C:\folder\folder\name.xml") and .Load ("C:\folder\folder\name.xml") ?
Load expects a file path and then loads the file content into the oXML object.
LoadXML doesn't expect a file parameter, but its actual XML text content that has to be a well formed string.
[2] XML distinguishes between lower and upper case, therefore nodes need to be addressed by their exact literal names:
the <Query> node wouldn't be identified by "query", "ConceptModel" isn't the same as "conceptmodel".
As second issue I would like to ask if
Dim oXml As MSXML2.DOMDocument would be the same as
Dim oXml As MSXML2.DOMDocument60,
since I checked in tools/references "Microsof XML, v6.0"?
No, it isn't. - Please note that the former declaration would load version 3.0 by default.
However it's absolutely preferrable to get the version 6.0 (any other versions are obsolete nowadays!)
As you are using so called early binding (referencing "Microsoft XML, v6.0"), I'll do the same but am referring to the current version 6.0:
Dim oXml As MSXML2.DOMDocument60 ' declare the xml doc object
Set oXml = New MSXML2.DOMDocument60 ' set an instance of it to memory
[3] misunderstanding some XPath expressions
A starting slash "/" in the XPath expression always refers to the DocumentElement (<Concepts> here),
you can add .DocumentElement to your document object instead. A starting double slash "//xyz" would find any "xyz" node if existant.
For instance
oXml.SelectNodes("//Query").Length
returns the same childNodes number (here: 3) as
oXml.DocumentElement.SelectNodes("//Query").Length ' or
oXml.SelectSingleNode("//Queries").ChildNodes.Length ' or even
oXml.SelectNodes("/*/*/*/Query").Length`.
Code example with reference to XML version 6.0
Of course you'd have to loop over several xml files, the example only uses one (starting in row 2).
Just for the case of not well formed xml files I added a detailled error Routine that enables you to identify the presumed error location. Load and LoadXML both return a boolean value (True if loaded correctly, False if not).
Sub xmlTest()
Dim ws As Worksheet: Set ws = ThisWorkbook.Sheets(3)
Dim oXml As MSXML2.DOMDocument60
Set oXml = New MSXML2.DOMDocument60
With oXml
.validateOnParse = True
.setProperty "SelectionLanguage", "XPath" ' necessary in version 3.0, possibly redundant here
.async = False
If Not .Load(ThisWorkbook.Path & "\xml\" & "name.xml") Then
Dim xPE As Object ' Set xPE = CreateObject("MSXML2.IXMLDOMParseError")
Dim strErrText As String
Set xPE = .parseError
With xPE
strErrText = "Load error " & .ErrorCode & " xml file " & vbCrLf & _
Replace(.URL, "file:///", "") & vbCrLf & vbCrLf & _
xPE.reason & _
"Source Text: " & .srcText & vbCrLf & vbCrLf & _
"Line No.: " & .Line & vbCrLf & _
"Line Pos.: " & .linepos & vbCrLf & _
"File Pos.: " & .filepos & vbCrLf & vbCrLf
End With
MsgBox strErrText, vbExclamation
Set xPE = Nothing
Exit Sub
End If
' Debug.Print "|" & oXml.XML & "|"
Dim Queries As IXMLDOMNodeList, Query As IXMLDOMNode
Dim Searched As String
Dim i&, ii&
i = 2 ' start row
' start XPath
Searched = "ConceptModel/Queries/Query" ' search string
Set Queries = oXml.DocumentElement.SelectNodes(Searched) ' XPath
'
ws.Cells(i, 1) = IIf(Queries.Length = 0, "No items", Queries.Length & " items")
ii = 1
For Each Query In Queries
ii = ii + 1
ws.Cells(i, ii) = Query.Text
Next
End With
End Sub
Additional hints
You also might be interested in an example how to list all child nodes via XMLDOM and to obtain attribute names from XML using VBA.
I include a further hint due to later comment (thanks to #barrowc )
"A further issue with using MSXML, v3.0 is that the default selection language is XSLPatterns instead of XPath.
Details on some of the differences between MSXML versions are here
and the differences between the two selection languages are discussed here."
In the current MSXML2 version 6.0 XPath 1.0 is fully supported. So it seems XSL Patterns have been implemented by Microsoft in earlier days, basically it can be regarded as a simplified subset of XPath expressions before W3C standardisation of XPath.
MSXML2 Version 3.0 allows the integration of XPath 1.0 at least by explicit selection language setting:
oXML.setProperty "SelectionLanguage", "XPath" ' oXML being the DOMDocument object as used in original post
It is the special characters (german alphabet) meaning you need to do something like a batch replace on the XML files so opening line is not this:
<?xml version="1.0" encoding="UTF-8"?>
but this:
<?xml version="1.0" encoding="iso-8859-1" ?>
Code to test with after:
Option Explicit
Public Sub test()
Dim xmlDoc As Object
Set xmlDoc = CreateObject("MSXML2.DOMDocument") 'New MSXML2.DOMDocument60
With xmlDoc
.validateOnParse = True
.setProperty "SelectionLanguage", "XPath"
.async = False
If Not .Load("C:\Users\User\Desktop\Test.xml") Then
Err.Raise .parseError.ErrorCode, , .parseError.reason
End If
End With
Debug.Print xmlDoc.SelectNodes("//Query").Length
End Sub
This is the XML I am using:
<?xml version="1.0" encoding="iso-8859-1" ?>
<Concepts>
<ConceptModel name="food">
<Filters>
<Filter type="CC"/>
</Filters>
<Queries>
<Query lang="EN">(cheese, bread, wine)</Query>
<Query lang="DE">(Käse, Brot, Wein)</Query>
<Query lang="FR">(fromaige, pain, vin)</Query>
</Queries>
</ConceptModel>
</Concepts>
I have some work to complete where I have 9 tabs of data (some of which contain thousands of lines of data). Each tab contains (amongst others) a policy number, a credit and/or a debit number.
Every policy number will have a match somewhere in the tabs containing an equal credit or debit, e.g.
tab 1 will have Policy number 123 and a credit of £100 and
tab 5 will also have policy number 123 with a debit of £100.
What I'm looking to do is, look through each policy number on every tab and find where the opposite amount is located adding the location address to each policy number.
I'm certainly not looking for anyone to create the coding for me, but what I am looking for is advice. I've looked at using loops but feel this may take a very long time to process. I've also looked at Dictionaries but am relatively new to these so am not very confident.
Is what I'm looking for even possible? And if so any ideas where to start or pointers? Any advice is greatly appreciated. Thanks!
Usage Example
#Matt555, You can test the created XML file with the following code to get the sheet names of policy "123" and debit of 100. I tested the code assuming your titles in row A:A contain "policy" and "debit"
#Peh, You are right, xml dom methods aren't used too often within vba. The advantage of using XML in this connex is a great flexibility in searching via XPath as well as performance over huge files. I prefer it even to arrays or dictionaries when filtering unique values. It is possible to return the found item number in node lists without looping through the whole data set ...
Option Explicit
Sub testPolicy()
Dim policy
Dim debit As Double
policy = "123"
debit = "100"
MsgBox "Policy " & policy & " found in " & vbNewLine & _
findSheetName(policy, debit), vbInformation, "Policy " & policy & " in Tabs"
' You can easily split this to an array and analyze the results
End Sub
Function findSheetName(ByVal policy, Optional ByVal debit) As String
' Purpose: Finds Sheet Names where policy AND/OR debit is found
' Note: Assuming your titles in row A:A contain "policy" and "debit"
' You can declare xDoc also after Option Explicit to make it public
Dim xDoc As Object
Dim xNd As Object ' MSXML.IXMDOMNode
Dim xNdList As Object ' MSXML.IXMLDOMNodeList
Dim s As String
' XPath expression
Dim xPth As String
If IsMissing(debit) Then
xPth = "//row[policy=""" & policy & """]"
Else
xPth = "//row[policy=""" & policy & """][debit=""" & debit & """]"
End If
' XML to memory
Set xDoc = CreateObject("MSXML2.Domdocument.6.0")
' allow XPath
xDoc.setProperty "SelectionLanguage", "XPath"
xDoc.validateOnParse = False
' ========
' LOAD XML
' ========
xDoc.Load ThisWorkbook.Path & "\" & "output.xml"
' Loop thru NodeList
Set xNdList = xDoc.DocumentElement.SelectNodes(xPth)
Debug.Print xPth, xNdList.Length
For Each xNd In xNdList
s = s & xNd.ParentNode.NodeName & "|"
Next xNd
Set xDoc = Nothing
findSheetName = s
End Function
You could
a) create an XML file looping through all sheets,
b) open it via load method and
c) perform a simple XPath search (I can give some examples later)
I modified a recent answer (cf. excel-vba-xml-parsing-performance)
to do step "a)" using late binding thus
a) avoiding a reference to the latest MS XML Version Version 6 (msxml6.dll) and
b) getting data over all xheets. XML allows you structured search via XPath over nodes in a logical structure comparable to HTML. The root node in this example is called data, the following nodes are named with the sheets' names and the subsequent nodes get the names in row A:A of each sheet.
A XML file is a simple text file, which you can open by a text editor. Above all you can use VBA XMLDOM methods to analyze or search through the items (nodes). I will give you examples to relating to your question, but give me some time. => see answer "Usage Example", where I explain some Advantages of XML, too (#Peh).
Please pay Attention to the added notes, too.
Option Explicit
Sub xmlExportSheets()
' Zweck: XML Export over all sheets in workbook
' cf. Site: [excel-vba-xml-parsing-performance][1][https://stackoverflow.com/questions/40986395/excel-vba-xml-parsing-performance/40987237#40987237][1]
' Note: pretty printed raw output with line breaks and indentation using an embedded XSLT stylesheet
On Error GoTo ErrHandle
' A. Declarations
' 1 DECLARE XML DOC OBJECT '
' a) Early Binding: VBA REFERENCE MSXML, v6.0 necessary'
' Dim doc As New MSXML2.DOMDocument60, xslDoc As New MSXML2.DOMDocument60, newDoc As New MSXML2.DOMDocument60
' Dim root As IXMLDOMElement, dataNode As IXMLDOMElement, datesNode As IXMLDOMElement, namesNode As IXMLDOMElement
' b) Late Binding XML Files:
Dim doc As Object
Dim xslDoc As Object
Dim newDoc As Object
' c) Late Binding XML Nodes:
Dim root As Object
Dim sh As Object ' xml node containing Sheet Name
Dim dataNode As Object
Dim datesNode As Object
Dim namesnode As Object
' 2 DECLARE other variables
Dim i As Long
Dim j As Long
Dim tmpValue As Variant
Dim tit As String
Dim ws As Worksheet
' B. XML Docs to Memory
Set doc = CreateObject("MSXML2.Domdocument.6.0")
Set xslDoc = CreateObject("MSXML2.Domdocument.6.0")
Set newDoc = CreateObject("MSXML2.Domdocument.6.0")
' C. Set DocumentElement (= root node)'
Set root = doc.createElement("data")
' D. Create Root Node
doc.appendChild root
' ===========================
' ITERATE THROUGH Sheets
' ===========================
For Each ws In ThisWorkbook.Sheets
Set sh = doc.createElement(ws.Name) '
root.appendChild sh
' ===========================
' ITERATE THROUGH ROWS ' A2:NNn
' ===========================
For i = 2 To ws.UsedRange.Rows.Count ' Sheets(1)
' DATA ROW NODE '
Set dataNode = doc.createElement("row") '
sh.appendChild dataNode
' TABLES NODE (orig.: DATES NODE) '
Set datesNode = doc.createElement(ws.Cells(1, 1)) ' Dates
datesNode.Text = ws.Range("A" & i)
dataNode.appendChild datesNode
' NAMES NODE '
For j = 1 To ws.UsedRange.Columns.Count - 1 ' = 12
tit = ws.Cells(1, j + 1)
tmpValue = ws.Cells(i, j + 1)
Set namesnode = doc.createElement(tit)
namesnode.Text = tmpValue
dataNode.appendChild namesnode
Next j
Next i
Next ws
' =============================
' PRETTY PRINT RAW OUTPUT (XSL)
' =============================
xslDoc.LoadXML "<?xml version=" & Chr(34) & "1.0" & Chr(34) & "?>" _
& "<xsl:stylesheet version=" & Chr(34) & "1.0" & Chr(34) _
& " xmlns:xsl=" & Chr(34) & "http://www.w3.org/1999/XSL/Transform" & Chr(34) & ">" _
& "<xsl:strip-space elements=" & Chr(34) & "*" & Chr(34) & " />" _
& "<xsl:output method=" & Chr(34) & "xml" & Chr(34) & " indent=" & Chr(34) & "yes" & Chr(34) & "" _
& " encoding=" & Chr(34) & "UTF-8" & Chr(34) & "/>" _
& " <xsl:template match=" & Chr(34) & "node() | #*" & Chr(34) & ">" _
& " <xsl:copy>" _
& " <xsl:apply-templates select=" & Chr(34) & "node() | #*" & Chr(34) & " />" _
& " </xsl:copy>" _
& " </xsl:template>" _
& "</xsl:stylesheet>"
' XSLT (Transformation)
xslDoc.async = False
doc.transformNodeToObject xslDoc, newDoc
' =================
' Save the XML File
' =================
newDoc.Save ThisWorkbook.Path & "\Output.xml"
MsgBox "Successfully exported Excel data to " & ThisWorkbook.Path & "\Output.XML!", vbInformation
' Regular End of procedure
Exit Sub
ErrHandle:
MsgBox Err.Number & " - " & Err.Description, vbCritical
Exit Sub
End Sub
Note
Sheet names have to be without spaces
Added Note (important hint):
XML Nodes use titles in first row of every sheet. As the modified procedure gets title names via UsedRange it's important not to have any empty cells in row A:A for this example.
Additional remark
I don't know the reason why my prompt answer (marked as "a") was downgraded by someone. I would find it helpful to argue this :-)
I need to create a macro (which I have never done before) and if you guys can guide me to a right path, it would be really appreciated.
What I'm doing currently:
I have created a mapping XML which I have imported into Excel. Once it is imported into Excel, users will then go ahead and paste some data in it and export it to receive an XML data file, which then user can drop it to a FTP where the job picks it up and imports it into database.
Here's the problem:
The export has following node, which I do not want:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
Instead I want to replace it with following:
<?xml version="1.0" ?>
<Root xmlns="http://tempuri.org/CourseImport.xsd">
How do I automate this? Is there some kind of setting in Excel that could make it happen?
Basically, I want the export to have my root instead of the default root and I want to automatically be able to drop the file to specified path: example: \development\school\course import
Thanks!
My co-worker actually helped me out with this. I thought I should share what I did
Sub ExportXML()
'
' Export XML Macro exports the data that is in Excel to XML.
'
Const ForReading = 1
Const ForWriting = 2
Set objFSO = CreateObject("Scripting.FileSystemObject")
'
newFileName = Application.GetSaveAsFilename("out.xml", "XML Files (*.xml), *.xmls")
If newFileName = False Then
Exit Sub
End If
If objFSO.FileExists(newFileName) Then
objFSO.DeleteFile (newFileName)
End If
ActiveWorkbook.XmlMaps("Root_Map").Export URL:=newFileName
Set objFile = objFSO.OpenTextFile(newFileName, ForReading)
Dim count
count = 0
Do Until objFile.AtEndOfStream
strLine = objFile.ReadLine
If count = 0 Then
strNewContents = strNewContents & "<?xml version=""1.0"" ?>" & vbCrLf
ElseIf count = 1 Then
strNewContents = strNewContents & "<Root xmlns=""http://tempuri.org/import.xsd"">" & vbCrLf
Else
strNewContents = strNewContents & strLine & vbCrLf
End If
count = count + 1
Loop
objFile.Close
Set objFile = objFSO.OpenTextFile(newFileName, ForWriting)
objFile.Write strNewContents
objFile.Close
End Sub