XML combine documents

XML combine documents - groovy

I am working on Boomi interface and I need to combine individual xml documents in to single output documents . The standard combine document step is not working properly.
All xml documents are of same structure .
First Document
<?xml version='1.0' encoding='UTF-8'?>
<EMPLEADOS>
<EMPLEADO TIPO="A" NUMERO="123">
<PROCESO PERIODO="201603" TT="MN" PAC="9999" />
<SECCION ID="ETACIV">
<CAMPO ID="ETA_ETCNOM" SEC=" " FECHA=" ">abc</CAMPO>
</SECCION>
</EMPLEADO>
</EMPLEADOS>
Second document
<?xml version='1.0' encoding='UTF-8'?>
<EMPLEADOS>
<EMPLEADO TIPO="A" NUMERO="123">
<PROCESO PERIODO="201603" TT="MN" PAC="9999" />
<SECCION ID="SADMIN ">
<CAMPO ID="SAD_SADESO" SEC=" " FECHA="01/03/2015">01/03/2015</CAMPO>
</SECCION>
</EMPLEADO>
</EMPLEADOS>
Third document
<?xml version='1.0' encoding='UTF-8'?>
<EMPLEADOS>
<EMPLEADO TIPO="A" NUMERO="123">
<PROCESO PERIODO="201603" TT="MN" PAC="9999" />
<SECCION ID="SADMIN ">
<CAMPO ID="SAD_SADESO" SEC=" " FECHA="01/06/2015">01/06/2015</CAMPO>
</SECCION>
</EMPLEADO>
</EMPLEADOS>
Expected output
<?xml version='1.0' encoding='UTF-8'?>
<EMPLEADOS>
<EMPLEADO TIPO="A" NUMERO="123">
<PROCESO PERIODO="201603" TT="MN" PAC="9999" />
<SECCION ID="ETACIV">
<CAMPO ID="ETA_ETCNOM" SEC=" " FECHA=" ">abc</CAMPO>
</SECCION>
<SECCION ID="SADMIN ">
<CAMPO ID="SAD_SADESO" SEC=" " FECHA="01/03/2015">01/03/2015</CAMPO>
<CAMPO ID="SAD_SADESO" SEC=" " FECHA="01/06/2015">01/06/2015</CAMPO>
</SECCION>
</EMPLEADO>
</EMPLEADOS>
The merging on elements are the same as attributes ? Technically , I need all the documents to be merged on ID attribute of CAMPO .
Any help greatly appreciated .
Thanks
Nag
I tried the below code ; getting Premature end of file. error.
import java.util.Properties;
import java.io.InputStream;
import org.jdom.input.SAXBuilder;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.xpath.XPath;
import org.jdom.output.XMLOutputter;
import groovy.util.slurpersupport.GPathResult;
import groovy.xml.StreamingMarkupBuilder;
for( int i = 0; i < dataContext.getDataCount(); i++ ) {
InputStream is = dataContext.getStream(i);
Properties props = dataContext.getProperties(i);
def xs = new XmlSlurper()
def employee = xs.parse(is);
String Encod = "UTF-8" ;
HashMap<String, GPathResult> CampoMap = new HashMap<String, GPathResult>()
employee.EMPLEADOS.EMPLEADO.PROCESO.SECCION.CAMPO.each {
CampoMap["${it.#ID}"] = it
}
new StreamingMarkupBuilder().bind {
mkp.xmlDeclaration(["version":"1.0", "encoding":"UTF-8"]);
EMPLEADOS {
EMPLEADO.PROCESO.SECCION.each {
if (CampoMap["${it.#ID}"] != null) {
it.appendNode(CampoMap["${it.#id}"].sites)
}
out << it
}
}
} .writeTo(is.newWriter(Encod))
}
dataContext.storeStream(is, props);
The new code is
import groovy.util.XmlParser
import groovy.xml.MarkupBuilder
def parser = new XmlParser()
def writer = new StringWriter()
def builder = new MarkupBuilder(writer)
for( int i = 0; i < dataContext.getDataCount(); i++ ) {
InputStream is = dataContext.getStream(i);
Properties props = dataContext.getProperties(i);
def mergedDocument = (0..<dataContext.dataCount)
.collect { XmlParser.parse(dataContext.getStream(it)) }
.inject { nodeA, nodeB -> merge(nodeA, nodeB) }
builder.mkp.xmlDeclaration(version:'1.0', encoding:'UTF-8')
builder.EMPLEADOS {
doc1.EMPLEADO.each { empleado ->
EMPLEADO(empleado.attributes()) {
empleado.PROCESO.each { proceso ->
PROCESO(proceso.attributes())
}
empleado.SECCION.each { seccion ->
SECCION(seccion.attributes()) {
seccion.CAMPO.each { campo ->
CAMPO(campo.attributes(), campo.value().head())
}
}
}
}
}
}
is = mergedDocument ;
}
/*
* Category to simplify XML node comparisons.
* Basically, two Nodes are equal if their attributes are the same.
*/
// class NodeCategory {
// static boolean equals(Node me, Node other) {
// me.attributes() == other.attributes()
// }
// static boolean isCase(List<Node> nodes, Node other) {
// nodes.find { it == other } != null
// }
//}
/*
* Merges document b into document a.
* WARNING: This method is destructive; it modifies document a
* #Returns a, for convenience
*/
def merge(a, b) {
// use(NodeCategory) {
b.EMPLEADO.each { empleado ->
def existingEmpleado = a.EMPLEADO.find {
it == empleado
}
if(existingEmpleado) {
// Empleado already exists, must merge differences.
// Add any missing PROCESO nodes.
empleado.PROCESO
.findAll { !(it in existingEmpleado.PROCESO) }
.with {
delegate.each { existingEmpleado.append(it) }
}
// Add any missing SECCION nodes.
empleado.SECCION
.findAll { !(it in existingEmpleado.SECCION) }
.with {
delegate.each { existingEmpleado.append(it) }
}
// Add any missing CAMPO nodes.
empleado.SECCION.each { seccion ->
existingEmpleado.SECCION
.find { it == seccion }
.with {
seccion.CAMPO
.findAll { !(it in delegate.CAMPO) }
.each { delegate.append(it) }
}
}
} else {
// Empleado does not exist, go ahead and add it as-is.
a.append(empleado)
}
}
// }
return a
}

First, I should mention that a generic method for combining XML documents is impossible because the merging process is contextual. How XML nodes are merged is dependent on what the nodes mean. A computer is incapable of figuring out the meaning on your data, so you as the programmer have to provide the instructions. Having said that, here's how to merge YOUR XML documents.
import groovy.util.XmlParser
import groovy.xml.MarkupBuilder
def parser = new XmlParser()
def writer = new StringWriter()
def builder = new MarkupBuilder(writer)
def doc1 = parser.parseText('''<?xml version='1.0' encoding='UTF-8'?>
<EMPLEADOS>
<EMPLEADO TIPO="A" NUMERO="123">
<PROCESO PERIODO="201603" TT="MN" PAC="9999" />
<SECCION ID="ETACIV">
<CAMPO ID="ETA_ETCNOM" SEC=" " FECHA=" ">abc</CAMPO>
</SECCION>
</EMPLEADO>
</EMPLEADOS>''')
def doc2 = parser.parseText('''<?xml version='1.0' encoding='UTF-8'?>
<EMPLEADOS>
<EMPLEADO TIPO="A" NUMERO="123">
<PROCESO PERIODO="201603" TT="MN" PAC="9999" />
<SECCION ID="SADMIN ">
<CAMPO ID="SAD_SADESO" SEC=" " FECHA="01/03/2015">01/03/2015</CAMPO>
</SECCION>
</EMPLEADO>
</EMPLEADOS>''')
def doc3 = parser.parseText('''<?xml version='1.0' encoding='UTF-8'?>
<EMPLEADOS>
<EMPLEADO TIPO="A" NUMERO="123">
<PROCESO PERIODO="201603" TT="MN" PAC="9999" />
<SECCION ID="SADMIN ">
<CAMPO ID="SAD_SADESO" SEC=" " FECHA="01/06/2015">01/06/2015</CAMPO>
</SECCION>
</EMPLEADO>
</EMPLEADOS>''')
merge(doc1, doc2)
merge(doc1, doc3)
builder.mkp.xmlDeclaration(version:'1.0', encoding:'UTF-8')
builder.EMPLEADOS {
doc1.EMPLEADO.each { empleado ->
EMPLEADO(empleado.attributes()) {
empleado.PROCESO.each { proceso ->
PROCESO(proceso.attributes())
}
empleado.SECCION.each { seccion ->
SECCION(seccion.attributes()) {
seccion.CAMPO.each { campo ->
CAMPO(campo.attributes(), campo.value().head())
}
}
}
}
}
}
println writer
/*
* Category to simplify XML node comparisons.
* Basically, two Nodes are equal if their attributes are the same.
*/
class NodeCategory {
static boolean equals(Node me, Node other) {
me.attributes() == other.attributes()
}
static boolean isCase(List<Node> nodes, Node other) {
nodes.find { it == other } != null
}
}
/*
* Merges document b into document a.
* WARNING: This method is destructive; it modifies document a
* #Returns a, for convenience
*/
def merge(a, b) {
use(NodeCategory) {
b.EMPLEADO.each { empleado ->
def existingEmpleado = a.EMPLEADO.find {
it == empleado
}
if(existingEmpleado) {
// Empleado already exists, must merge differences.
// Add any missing PROCESO nodes.
empleado.PROCESO
.findAll { !(it in existingEmpleado.PROCESO) }
.with {
delegate.each { existingEmpleado.append(it) }
}
// Add any missing SECCION nodes.
empleado.SECCION
.findAll { !(it in existingEmpleado.SECCION) }
.with {
delegate.each { existingEmpleado.append(it) }
}
// Add any missing CAMPO nodes.
empleado.SECCION.each { seccion ->
existingEmpleado.SECCION
.find { it == seccion }
.with {
seccion.CAMPO
.findAll { !(it in delegate.CAMPO) }
.each { delegate.append(it) }
}
}
} else {
// Empleado does not exist, go ahead and add it as-is.
a.append(empleado)
}
}
}
return a
}
The process goes like this:
The merge(Node a, Node b) method traverses through the nodes, handling each case so that a ends up being the combination of both documents (node trees). It's based on figuring out whether a node in b is already in a. If not, the node is added as-is. Otherwise, merge the changes a accordingly. Yes, this method is butt ugly and was a real PITA to write. Please, for the sake of sound programming, refactor the beast.
Finally, a MarkupDocumentBuilder is used to process the final node tree and produce a serialized XML document.
You may notice there's a Groovy category involved. It's used to simplify the Node comparisons.
Addendum - Using input streams
You can invoke the same process using InputStreams as the source of XML documents. It would go something like this:
def parser = new XmlParser()
def mergedDocument = (0..<dataContext.dataCount)
.collect { parser.parse(dataContext.getStream(it) }
.inject { nodeA, nodeB -> merge(nodeA, nodeB) }
Then, you can process mergedDocument with the MarkupBuilder.

Related

How to iterate into the collection of class objects in node.js

I have two classes one and two
class One {
constructor(field1, field2) {
this.field1 = field1;
this.field2 = field2;
}
}
module.exports = one;
class Two {
constructor(field11, field22, list) {
this.field11 = field11;
this.field22 = field22;
this.list = list;
}
add(one) {
this.list.push(one);
}
}
module.exports = Two;
Third class imports both classes
const one= require('./one.js');
const two= require('./two.js');
Now, I have a function which creates an object of class two and add some values like,
two = new two();
two.add(new one(1,1000));
two.add(new one(2,2000));
console.log(two.list);
////list is a collection of class one object
Till this point is working fine, I am getting collection
My query is how to iterate through collection
like, I want to access
two.list[0].field1
// not getting the property field1

Try this:
class One {
constructor(field1, field2) {
this.field1 = field1; this.field2 = field2;
}
}
class Two {
constructor(field11, field22, list = []) {
this.field11 = field11; this.field22 = field22;
this.list = list
}
add(one) {
this.list.push(one);
}
}
two = new Two();
two.add(new One(1, 1000));
two.add(new One(2, 2000));
console.log(two.list);

There are some issues in code:
Naming and bracket is not closing correct
Default list parameter is also written in wrong format
class One {
constructor(field1, field2) {
this.field1 = field1;
this.field2 = field2;
}
}
class Two {
constructor(field11, field22, list = []) {
this.field11 = field11;
this.field22 = field22;
this.list = list;
}
add(one) {
this.list.push(one);
}
}
two = new Two();
two.add(new One(1,1000));
two.add(new One(2,2000));
console.log(two.list[0].field1);
Updated your code. Try running it

How to read Html files by using XmlReader

I want to read HTML file by using XMLReader. So I wrote some codes, but it throws only XmlException. So please give me any suggestion about how to read the HTML file (and tags) line by line using C#.
public class HtmlReader
{
public List<HtmlDocument> Read(string path)
{
List<HtmlDocument> html = new List<HtmlDocument>();
HtmlDocument h1 = new HtmlDocument();
using (XmlReader reader = XmlReader.Create(path.ToString()))
{
try
{
while (reader.Read())
{
if (reader.IsStartElement())
{
if (reader.Name == "title" || reader.Name == "body")
{
switch (reader.Name)
{
case "title":
if (reader.Read())
{
h1.Title = reader.Value.Trim();
}
break;
case "body":
if (reader.Read())
{
}
break;
}
}
}
}
}
catch(XmlException)
{
}
}
return html;
}
}
}
class Program
{
static void Main(string[] args)
{
HtmlReader readerObject = new HtmlReader();
List<HtmlDocument> employeeCollection = readerObject.Read("E:/workoutsPrograms/ConsoleApplication4/Table.html");
}
}
I tried this, but I was not able to read the Html tags line by line. Apart from my expectation, it throws only Exception.

I find the answer for above question. Following codes you can use.
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Ignore;
settings.IgnoreWhitespace = true;
XmlRederSettings is enabled set features of XmlReader. In that Html file have DOCTYPe, For that avoid we want use the Dtdprocessing.Ignore.

Split an XML file into multiple files

Suppose I have a following XML file:
<a>
<b>
....
</b>
<b>
....
</b>
<b>
....
</b>
</a>
I want split this file into multiple XML files based on the number of <b> tags.
Like:
File01.xml
<a>
<b>
....
</b>
</a>
File02.xml
<a>
<b>
....
</b>
</a>
File03.xml
<a>
<b>
....
</b>
</a>
And so on...
I'm new to Groovy and I tried with the following piece of code.
import java.util.HashMap
import java.util.List
import javax.xml.parsers.DocumentBuilderFactory
import org.custommonkey.xmlunit.*
import org.w3c.dom.NodeList
import javax.xml.xpath.*
import javax.xml.transform.TransformerFactory
import org.w3c.dom.*
import javax.xml.transform.dom.DOMSource
import javax.xml.transform.stream.StreamResult
class file_split {
File input = new File("C:\\file\\input.xml")
def dbf = DocumentBuilderFactory.newInstance().newDocumentBuilder()
def doc = new XmlSlurper(dbf).parse(ClassLoader.getSystemResourceAsStream(input));
def xpath = XPathFactory.newInstance().newXPath()
NodeList nodes = (NodeList) xpath.evaluate("//a/b", doc, XPathConstants.NODESET)
def itemsPerFile = 5;
def fileNumber = 0;
def currentdoc = dbf.newDocument()
def rootNode = currentdoc.createElement("a")
def currentFile = new File(fileNumber + ".xml")
try{
for(i = 1; i <= nodes.getLength(); i++){
def imported = currentdoc.importNode(nodes.item(i-1), true)
rootNode.appendChild(imported)
if(i % itemsPerFile == 0){
writeToFile(rootNode, currentFile)
rootNode = currentdoc.createElement("a");
currentFile = new File((++fileNumber)+".xml");
}
}
}
catch(Exception ex){
logError(file.name,ex.getMessage());
ex.printStackTrace();
}
def writeToFile(Node node, File file) throws Exception {
def transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(node), new StreamResult(new FileWriter(file)));
}
}
Any help would be greatly appreciated.

This should work:
import groovy.xml.*
new XmlSlurper().parseText( file ).b.eachWithIndex { element, index ->
new File( "/tmp/File${ "${index+1}".padLeft( 2, '0' ) }.xml" ).withWriter { w ->
w << XmlUtil.serialize( new StreamingMarkupBuilder().bind {
a {
mkp.yield element
}
} )
}
}
If you want to group them, you can use collate (this example groups 2 b tags per file:
import groovy.xml.*
new XmlSlurper().parseText( file )
.b
.toList()
.collate( 2 )
.eachWithIndex { elements, index ->
new File( "/tmp/File${ "${index+1}".padLeft( 2, '0' ) }.txt" ).withWriter { w ->
w << XmlUtil.serialize( new StreamingMarkupBuilder().bind {
a {
elements.each { element ->
mkp.yield element
}
}
} )
}
}

I don't know what problem you are experiencing, but it seems like your creating a new rootNode when needed, but not a new currentdoc. Try to reinitialize the currentdoc right before you reinitialize the rootNode in your loop.

Groovy: Accessing Closure Object's elements

I am new to Groovy, and was wondering:
If I define a object like this:
def buildParentXML(){
def parentXMLElement = {
ParentElement {
CreationDate(new Date())
out << buildChildXML()
ChildElementFK(buildChildXML().childElement.ChildPK) //Something like this
}
}
}
def buildChildXML() {
def childElement {
ChildPK("12345679")
Operator("Don't Know")
}
}
How would I access the value of Element1 or Element2?
I tried
println obj.RootElement.Element1
println obj[RootElement].[Element1]
println obj['RootElement'].['Element1']
Simple Example
<SavePolicy>
<Segment>
<IssueState>AK</IssueState>
<OptionCode>ADD</OptionCode>
<SegmentStatus>Aive</SegmentStatus>
<ApplicationReceivedDate>09/17/2013</ApplicationReceivedDate>
<ApplicationSignedDate>09/17/2013</ApplicationSignedDate>
<CreationDate>09/17/2013</CreationDate>
<EffeiveDate>09/17/2013</EffeiveDate>
<IssueDate>09/17/2013</IssueDate>
<TerminationDate>09/17/2013</TerminationDate>
<RateSeriesDate>09/17/2013</RateSeriesDate>
</Segment>
<Life>
<FaceAmount>250.00</FaceAmount>
</Life>
Will Be converted into
<?xml version="1.0" encoding="UTF-8"?>
<SEGRequestVO>
<Service>Policy</Service>
<Operation>submit</Operation>
<Operator>N/A</Operator>
<IgnoreEditWarningsNF/>
<RequestParameters>
<SubmissionType>SaveIt</SubmissionType>
<ContraNumber/>
<SegmentVO>
<IssueState>AK</IssueState>
<OptionCode>DD</OptionCode>
<SegmentStatus>Aive</SegmentStatus>
<ApplicationReceivedDate>09/17/2013</ApplicationReceivedDate>
<ApplicationSignedDate>09/17/2013</ApplicationSignedDate>
<CreationDate>09/17/2013</CreationDate>
<EffeiveDate>09/17/2013</EffeiveDate>
<IssueDate>09/17/2013</IssueDate>
<TerminationDate>09/17/2013</TerminationDate>
<RateSeriesDate>09/17/2013</RateSeriesDate>
<ContraNumber/>
<ProduStruureFK>01</ProduStruureFK>
<LifeVO>
<FaceAmount>250.00</FaceAmount>
<LifePK>-123464646</LifePK>
<SegmentFK/>
</LifeVO></SegmentVO>
</RequestParameters>
</SEGRequestVO>

Right, I took a wild guess...is this what you mean?
import groovy.xml.*
def buildChildXML = {
ChildPK("12345679")
Operator("Don't Know")
return "12345679"
}
def buildParentXML = {
ParentElement {
CreationDate(new Date())
def pk = buildChildXML()
ChildElementFK( pk )
}
}
println XmlUtil.serialize( new StreamingMarkupBuilder().bind { it ->
buildParentXML.delegate = it
buildChildXML.delegate = it
buildParentXML()
} )
That prints:
<?xml version="1.0" encoding="UTF-8"?><ParentElement>
<CreationDate>Mon Sep 16 17:02:42 BST 2013</CreationDate>
<ChildPK>12345679</ChildPK>
<Operator>Don't Know</Operator>
<ChildElementFK>12345679</ChildElementFK>
</ParentElement>

replace XmlSlurper tag with arbitrary XML

I am trying to replace specific XmlSlurper tags with arbitrary XML strings. The best way I have managed to come up with to do this is:
#!/usr/bin/env groovy
import groovy.xml.StreamingMarkupBuilder
def page=new XmlSlurper(new org.cyberneko.html.parsers.SAXParser()).parseText("""
<html>
<head></head>
<body>
<one attr1='val1'>asdf</one>
<two />
<replacemewithxml />
</body>
</html>
""".trim())
import groovy.xml.XmlUtil
def closure
closure={ bind,node->
if (node.name()=="REPLACEMEWITHXML") {
bind.mkp.yieldUnescaped "<replacementxml>sometext</replacementxml>"
} else {
bind."${node.name()}"(node.attributes()) {
mkp.yield node.text()
node.children().each { child->
closure(bind,child)
}
}
}
}
println XmlUtil.serialize(
new StreamingMarkupBuilder().bind { bind->
closure(bind,page)
}
)
However, the only problem is the text() element seems to capture all child text nodes, and thus I get:
<?xml version="1.0" encoding="UTF-8"?>
<HTML>asdf<HEAD/>
<BODY>asdf<ONE attr1="val1">asdf</ONE>
<TWO/>
<replacementxml>sometext</replacementxml>
</BODY>
</HTML>
Any ideas/help much appreciated.
Thank you!
Misha
p.s. Also, out of curiosity, if I change the above to the "Groovier" notation as follows, the groovy compiler thinks I am trying to access the ${node.name()} member of my test class. Is there a way to specify this is not the case while still not passing the actual builder object? Thank you! :)
def closure
closure={ node->
if (node.name()=="REPLACEMEWITHXML") {
mkp.yieldUnescaped "<replacementxml>sometext</replacementxml>"
} else {
"${node.name()}"(node.attributes()) {
mkp.yield node.text()
node.children().each { child->
closure(child)
}
}
}
}
println XmlUtil.serialize(
new StreamingMarkupBuilder().bind {
closure(page)
}
)

Ok here is what I came up with:
#!/usr/bin/env groovy
import groovy.xml.StreamingMarkupBuilder
import groovy.xml.XmlUtil
def printSlurper={page->
println XmlUtil.serialize(
new StreamingMarkupBuilder().bind { bind->
mkp.yield page
}
)
}
def saxParser=new org.cyberneko.html.parsers.SAXParser()
saxParser.setFeature('http://xml.org/sax/features/namespaces',false)
saxParser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment",true)
def string="TEST"
def middleClosureHelper={ builder->
builder."${string}" {
mkp.yieldUnescaped "<inner>XML</inner>"
}
}
def middleClosure={
MiddleClosure {
middleClosureHelper(delegate)
}
}
def original=new XmlSlurper(saxParser).parseText("""
<original>
<middle>
</middle>
</original>
""")
original.depthFirst().find { it.name()=='MIDDLE' }.replaceNode { node->
mkp.yield middleClosure
}
printSlurper(original)
assert original.depthFirst().find { it.name()=='INNER' } == null
def modified=new XmlSlurper(saxParser).parseText(new StreamingMarkupBuilder().bind {mkp.yield original}.toString())
assert modified.depthFirst().find { it.name()=='INNER' } != null
You have to reload the slurper, but it works!
Misha

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

XML combine documents - groovy

Related

How to iterate into the collection of class objects in node.js

How to read Html files by using XmlReader

Split an XML file into multiple files

Groovy: Accessing Closure Object's elements

replace XmlSlurper tag with arbitrary XML

Categories

Resources