Xml source This article contains example code that shows the usage of the XmlSource component.
The Xml source let you load data in xml format from various sources - either from a file, a web service or from an Azure blob.
Shared code# The following method is used in the subsequent examples, and prints the content of a xml file on the console output.
private void PrintFile ( string sourceFile ) {
Console . WriteLine ( $"Content of file '{Path.GetFileName(sourceFile)}'" );
Console . WriteLine ( "---" );
Console . WriteLine ( File . ReadAllText ( sourceFile ));
Console . WriteLine ( "---" );
}
Read xml file# With POCO, only Elements# Let’s start with an xml file that contains only elements. We can read this into a memory destination using a POCO (Plain old component object).
public class Element
{
public SubElement Inner { get ; set ; }
public int Id { get ; set ; }
}
public class SubElement
{
public string Value { get ; set ; }
public decimal Number { get ; set ; }
}
string sourceFile = "res/Examples/Elements.xml" ;
PrintFile ( sourceFile );
var source = new XmlSource < Element >() {
ResourceType = ResourceType . File ,
Uri = sourceFile
};
var dest = new MemoryDestination < Element >();
source . LinkTo ( dest );
Network . Execute ( source );
foreach ( var row in dest . Data )
Console . WriteLine ( $"Received Id: {row.Id}, Value1: {row.Inner.Value}, Value2: {row.Inner.Number}" );
/* Output:
Content of file 'Elements.xml'
---
<?xml version="1.0" encoding="utf-8"?>
<Root>
<CreationDate>2022-05-01</CreationDate>
<Element>
<Id>1</Id>
<Inner>
<Value>Test1</Value>
<Number>1.1</Number>
</Inner>
</Element>
<Element>
<Id>2</Id>
<Inner>
<Value></Value>
<Number>1.2</Number>
</Inner>
</Element>
<Element>
<Id>3</Id>
<Inner>
<Value>Test3</Value>
<Number>1.3</Number>
</Inner>
</Element>
<Confidential>false</Confidential>
</Root>
---
Received Id: 1, Value1: Test1, Value2: 1.1
Received Id: 2, Value1: , Value2: 1.2
Received Id: 3, Value1: Test3, Value2: 1.3
*/
Elements and attributes mixed# If attributes and elements are mixed in your source xml, you can use the Xml attributes provided by the System.Xml.Serialization
namespace.
[XmlRoot("Element")]
public class ElementWA
{
[XmlElement("Inner")]
public SubElementWA InnerElement { get ; set ; }
[XmlAttribute("Id")]
public int IdAttr { get ; set ; }
}
public class SubElementWA
{
public string Value { get ; set ; }
[XmlAttribute("Number")]
public decimal Number { get ; set ; }
}
string sourceFile = "res/Examples/ElementsAndAttributes.xml" ;
PrintFile ( sourceFile );
var source = new XmlSource < ElementWA >() {
ResourceType = ResourceType . File ,
Uri = sourceFile
};
var dest = new MemoryDestination < ElementWA >();
source . LinkTo ( dest );
Network . Execute ( source );
foreach ( var row in dest . Data )
Console . WriteLine ( $"Received Id: {row.IdAttr}, Value1: {row.InnerElement.Value}, Value2: {row.InnerElement.Number}" );
/* Output:
Content of file 'ElementsAndAttributes.xml'
---
<?xml version="1.0" encoding="utf-8"?>
<Root>
<CreationDate>2022-05-01</CreationDate>
<Element Id="1">
<Inner Number="1.1">
<Value>Test1</Value>
</Inner>
</Element>
<Element Id="2">
<Inner Number="1.2">
<Value></Value>
</Inner>
</Element>
<Element Id="3">
<Inner Number="1.3">
<Value>Test3</Value>
</Inner>
</Element>
<Confidential>false</Confidential>
</Root>
---
Received Id: 1, Value1: Test1, Value2: 1.1
Received Id: 2, Value1: , Value2: 1.2
Received Id: 3, Value1: Test3, Value2: 1.3
*/
Reading into dynamic# We can read again the xml file in the previous example, this time without using a POCO but a dynamic ExpandoObject.
string sourceFile = "res/Examples/ElementsAndAttributes.xml" ;
PrintFile ( sourceFile );
var source = new XmlSource () {
ResourceType = ResourceType . File ,
Uri = sourceFile
};
source . ElementName = "Element" ;
source . AttributePrefixForDynamic = "at_" ;
var dest = new MemoryDestination ();
source . LinkTo ( dest );
Network . Execute ( source );
foreach ( dynamic row in dest . Data )
Console . WriteLine ( $"Received Id: {row.at_Id}, Value1: {row.Inner.Value}, Value2: {row.Inner.at_Number}" );
/* Output:
Content of file 'ElementsAndAttributes.xml'
---
<?xml version="1.0" encoding="utf-8"?>
<Root>
<CreationDate>2022-05-01</CreationDate>
<Element Id="1">
<Inner Number="1.1">
<Value>Test1</Value>
</Inner>
</Element>
<Element Id="2">
<Inner Number="1.2">
<Value></Value>
</Inner>
</Element>
<Element Id="3">
<Inner Number="1.3">
<Value>Test3</Value>
</Inner>
</Element>
<Confidential>false</Confidential>
</Root>
---
Received Id: 1, Value1: Test1, Value2: 1.1
Received Id: 2, Value1: , Value2: 1.2
Received Id: 3, Value1: Test3, Value2: 1.3
*/
Different element names# If our source contains not the same element name for our data, but has changing names, we can use the ElementNameRetrievalFunc
to adjust the element before reading the next element.
string sourceFile = "res/Examples/DifferentElementNames.xml" ;
PrintFile ( sourceFile );
var source = new XmlSource () {
ResourceType = ResourceType . File ,
Uri = sourceFile
};
source . ElementNameRetrievalFunc = ( smd ) => {
if (( string ) smd . AdditonalData == "A" ) return "A" ;
else return "B" ;
};
source . AttributePrefixForDynamic = "" ;
var dest = new MemoryDestination ();
source . LinkTo ( dest );
Network . Execute ( source );
foreach ( dynamic row in dest . Data ) {
Console . WriteLine ( $"Received Id: {row.Id}, Value: {row.Value}" );
}
/* Output:
Content of file 'DifferentElementNames.xml'
---
<?xml version="1.0" encoding="utf-8"?>
<Root>
<CreationDate>2022-05-01</CreationDate>
<A Id="1">
<Value>Test1</Value>
</A>
<B Value="Test2">
<Id>2</Id>
</B>
<Skipped Id="0">X</Skipped>
<B Value="Test3">
<Id>3</Id>
</B>
<A Id="4">
<Value>Test4</Value>
</A>
</Root>
---
Received Id: 1, Value: Test1
Received Id: 2, Value: Test2
Received Id: 3, Value: Test3
Received Id: 4, Value: Test4
*/
Redirecting errors# We can use the error linking if we want to ignore flawed data in the source.
public class Element
{
public SubElement Inner { get ; set ; }
public int Id { get ; set ; }
}
public class SubElement
{
public string Value { get ; set ; }
public decimal Number { get ; set ; }
}
string sourceFile = "res/Examples/ElementsWithErrors.xml" ;
PrintFile ( sourceFile );
var source = new XmlSource < Element >() {
ResourceType = ResourceType . File ,
Uri = sourceFile
};
var dest = new MemoryDestination < Element >();
var errorDest = new MemoryDestination < ETLBoxError >();
source . LinkTo ( dest );
source . LinkErrorTo ( errorDest );
Network . Execute ( source );
foreach ( var row in dest . Data )
Console . WriteLine ( $"Received Id: {row.Id}, Value1: {row.Inner.Value}, Value2: {row.Inner.Number}" );
foreach ( var err in errorDest . Data )
Console . WriteLine ( $"Error record: {err.ErrorText} " +
Environment . NewLine +
$"{err.RecordAsJson}" );
/* Output:
Content of file 'ElementsWithErrors.xml'
---
<?xml version="1.0" encoding="utf-8"?>
<Root>
<CreationDate>2022-05-01</CreationDate>
<Element>
<Id>1</Id>
<Inner>
<Value>OK</Value>
<Number>1.1</Number>
</Inner>
</Element>
<Element>
<Id>2</Id>
<Inner>
<Value>ERR</Value>
<Number></Number>
</Inner>
</Element>
<Element>
<Id>3</Id>
<Inner>
<Value>OK</Value>
<Number>1.3</Number>
</Inner>
</Element>
<Element>
<Id>X</Id>
<Inner>
<Value>ERR</Value>
<Number>1.4</Number>
</Inner>
</Element>
<Confidential>false</Confidential>
</Root>
---
Received Id: 1, Value1: OK, Value2: 1.1
Received Id: 3, Value1: OK, Value2: 1.3
Error record: There is an error in XML document (0, 0).
<Element>
<Id>2</Id>
<Inner>
<Value>ERR</Value>
<Number></Number>
</Inner>
</Element>
Error record: There is an error in XML document (0, 0).
<Element>
<Id>X</Id>
<Inner>
<Value>ERR</Value>
<Number>1.4</Number>
</Inner>
</Element>
*/
Reading multiple Uris# You can use the GetNextUri
/HasNextUri
pattern (provided on all streaming connectors) to go through a set a files, web service endpoints or blobs.
The following example shows the usage with files - change the ResourceType
e.g. to Http if you want to read xml formatted data from more than one endpoint.
[XmlRoot("Element")]
public class MyRow
{
[XmlAttribute("Id")]
public int Id { get ; set ; }
public string Value { get ; set ; }
}
string pattern = @"res/Examples/File" ;
PrintFile ( "res/Examples/File1.xml" );
PrintFile ( "res/Examples/File2.xml" );
PrintFile ( "res/Examples/File3.xml" );
var source = new XmlSource < MyRow >();
source . ResourceType = ResourceType . File ;
source . GetNextUri = meta => pattern + ( meta . RequestCount + 1 ) + ".xml" ;
source . HasNextUri = meta => meta . RequestCount < 3 ;
var dest = new MemoryDestination < MyRow >();
source . LinkTo ( dest );
Network . Execute ( source );
foreach ( var row in dest . Data )
Console . WriteLine ( $"Received Id: {row.Id}, Value1: {row.Value}" );
/* Output:
Content of file 'File1.xml'
---
<?xml version="1.0" encoding="utf-8"?>
<Root>
<Element Id="1">
<Value>Test1</Value>
</Element>
<Element Id="2">
<Value>Test2</Value>
</Element>
</Root>
---
Content of file 'File2.xml'
---
<?xml version="1.0" encoding="utf-8"?>
<Root>
<Element Id="3">
<Value>Test3</Value>
</Element>
<Element Id="4">
<Value>Test4</Value>
</Element>
</Root>
---
Content of file 'File3.xml'
---
<?xml version="1.0" encoding="utf-8"?>
<Root>
<Element Id="5">
<Value>Test5</Value>
</Element>
</Root>
---
Received Id: 1, Value1: Test1
Received Id: 2, Value1: Test2
Received Id: 3, Value1: Test3
Received Id: 4, Value1: Test4
Received Id: 5, Value1: Test5
*/
Different resource types# Reading from Azure blob storage# All streaming connectors support reading data from Azure blob storage instead of a file. Here is an example for the XmlSource.
public class Element
{
public SubElement Inner { get ; set ; }
public int Id { get ; set ; }
}
public class SubElement
{
public string Value { get ; set ; }
public decimal Number { get ; set ; }
}
//Upload source file into Azure blob container
string sourceFile = @"res/Examples/Elements.xml" ;
PrintFile ( sourceFile );
string connectionString = AzureStorageHelper . RetrieveConnectionString ();
AzureStorageHelper . DeleteAndCreateContainer ( connectionString , "xml-test" );
BlobContainerClient containerClient = new BlobContainerClient ( connectionString , "xml-test" );
using ( var reader = new FileStream ( sourceFile , FileMode . Open )) {
var resp = containerClient . GetBlobClient ( "Elements.xml" ). Upload ( reader );
}
//Read uploaded file with CsvSource
var source = new XmlSource < Element >( "Elements.xml" );
source . ResourceType = ResourceType . AzureBlob ;
source . AzureBlobStorage . ConnectionString = connectionString ;
source . AzureBlobStorage . ContainerName = "xml-test" ;
var dest = new MemoryDestination < Element >();
source . LinkTo ( dest );
Network . Execute ( source );
foreach ( var row in dest . Data )
Console . WriteLine ( $"Received Id: {row.Id}, Value1: {row.Inner.Value}, Value2: {row.Inner.Number}" );
/* Output:
Content of file 'Elements.xml'
---
<?xml version="1.0" encoding="utf-8"?>
<Root>
<CreationDate>2022-05-01</CreationDate>
<Element>
<Id>1</Id>
<Inner>
<Value>Test1</Value>
<Number>1.1</Number>
</Inner>
</Element>
<Element>
<Id>2</Id>
<Inner>
<Value></Value>
<Number>1.2</Number>
</Inner>
</Element>
<Element>
<Id>3</Id>
<Inner>
<Value>Test3</Value>
<Number>1.3</Number>
</Inner>
</Element>
<Confidential>false</Confidential>
</Root>
---
Received Id: 1, Value1: Test1, Value2: 1.1
Received Id: 2, Value1: , Value2: 1.2
Received Id: 3, Value1: Test3, Value2: 1.3
*/
Reading from web service# All streaming connectors support reading data from any http endpoint. Here is an example for reading data from a mocked WebServer using a POST request.
public class Element
{
public SubElement Inner { get ; set ; }
public int Id { get ; set ; }
}
public class SubElement
{
public string Value { get ; set ; }
public decimal Number { get ; set ; }
}
string sourceFile = "res/Examples/Elements.xml" ;
var server = WireMockServer . Start ();
server
. Given (
Request . Create ()
. WithPath ( "/test" )
. UsingPost ())
. RespondWith (
Response . Create ()
. WithStatusCode ( 200 )
. WithHeader ( "Content-Type" , "text/xml" )
. WithBody ( File . ReadAllText ( sourceFile ))
);
var source = new XmlSource < Element >() {
ResourceType = ResourceType . Http ,
Uri = @ $"http://localhost:{server.Port}/test"
};
source . HttpRequestMessage . Method = HttpMethod . Post ;
source . HttpRequestMessage . Headers . AcceptEncoding . Add (
new System . Net . Http . Headers . StringWithQualityHeaderValue ( "*" ));
source . HttpRequestMessage . Properties . Add ( "Content-Type" , "application/xml" );
var dest = new MemoryDestination < Element >();
source . LinkTo ( dest );
Network . Execute ( source );
Console . WriteLine ( "Request finished with status code:" + source . HttpResponseMessage . StatusCode . ToString ());
foreach ( var row in dest . Data )
Console . WriteLine ( $"Received Id: {row.Id}, Value1: {row.Inner.Value}, Value2: {row.Inner.Number}" );
/* Output:
Content of file 'Elements.xml'
---
<?xml version="1.0" encoding="utf-8"?>
<Root>
<CreationDate>2022-05-01</CreationDate>
<Element>
<Id>1</Id>
<Inner>
<Value>Test1</Value>
<Number>1.1</Number>
</Inner>
</Element>
<Element>
<Id>2</Id>
<Inner>
<Value></Value>
<Number>1.2</Number>
</Inner>
</Element>
<Element>
<Id>3</Id>
<Inner>
<Value>Test3</Value>
<Number>1.3</Number>
</Inner>
</Element>
<Confidential>false</Confidential>
</Root>
---
Request finished with status code:OK
Received Id: 1, Value1: Test1, Value2: 1.1
Received Id: 2, Value1: , Value2: 1.2
Received Id: 3, Value1: Test3, Value2: 1.3
*/
Reading with Namespaces# This example shows how different namespaces in an Xml file can be addressed.
XmlSource < FTableRow > source = new XmlSource < FTableRow >( "res/Examples/namespaces.xml" , ResourceType . File );
var dest = new MemoryDestination < FTableRow >();
source . LinkTo ( dest );
Network . Execute ( source );
PrintFile ( "res/Examples/namespaces.xml" );
foreach ( var row in dest . Data )
Console . WriteLine ( $"Received: {row.Name} - {row.Width}x{row.Length}x{row.Height}" );
//Output
/*
Content of file 'namespaces.xml'
---
<?xml version="1.0" encoding="utf-8" ?>
<root>
<h:table xmlns:h="http://www.w3.org/TR/html4/">
<h:tr>
<h:td class="green">Apples</h:td>
<h:td class="yellow">Bananas</h:td>
</h:tr>
</h:table>
<f:table xmlns:f="https://www.w3schools.com/furniture" name="Coffee table">
<f:width>80</f:width>
<f:length>120</f:length>
</f:table>
<h:table xmlns:h="http://www.w3.org/TR/html4/">
<h:tr>
<h:td class="yellow">Lemons</h:td>
<h:td>Oranges</h:td>
</h:tr>
</h:table>
<h:table xmlns:h="http://www.w3.org/TR/html4/">
<h:tr>
<h:td class="green">Kiwis</h:td>
<h:td class="black">Grapes</h:td>
</h:tr>
</h:table>
<f:table xmlns:f="https://www.w3schools.com/furniture" name="Dining table">
<f:width>100</f:width>
<f:length>200</f:length>
</f:table>
<f:table xmlns:f="https://www.w3schools.com/furniture" name="Sofa">
<f:width>400</f:width>
<f:height>50</f:height>
</f:table>
</root>
---
Received: Coffee table - 80x120x
Received: Dining table - 100x200x
Received: Sofa - 400xx50
*/
Adding new column during Extraction# This example shows how a new column can be added to the extracted xml data. This example will also read only data on a particular namespace.
[XmlRoot(ElementName = "Main", Namespace = "urn:com.emp/empqueue")]
public class XmlDataAddCol {
public int EmpID { get ; set ; }
[XmlElement("Emp_Name")]
public string Name { get ; set ; }
[XmlElement("Emp_Address")]
public string Address { get ; set ; }
//This is the new column that we want to add,
//which doesn't exists in the source xml file
public string NewColumn { get ; set ; }
}
PrintFile ( "res/Examples/input.xml" );
var source = new XmlSource < XmlDataAddCol >( "res/Examples/input.xml" , ETLBox . ResourceType . File );
source . ElementName = "ag:Main" ;
var rowTrans = new RowTransformation < XmlDataAddCol >();
rowTrans . TransformationFunc = row => {
row . NewColumn = "Additional data for id " + row . EmpID ;
return row ;
};
var dest = new DataFrameDestination < XmlDataAddCol >();
source . LinkTo ( rowTrans );
rowTrans . LinkTo ( dest );
Network . Execute ( source );
Console . WriteLine ( dest . DataFrame . ToString ());
/* Output
Content of file 'input.xml'
---
<?xml version="1.0" encoding="UTF-8"?>
<ag:Emp_queue xmlns:ag="urn:com.emp/empqueue" xmlns:xsi="http://www.w3.org/2001/xmlschema-instance">
<ag:Main>
<ag:EmpID>1002</ag:EmpID>
<ag:Emp_Name>Peter</ag:Emp_Name>
<ag:Emp_Address>III</ag:Emp_Address>
</ag:Main>
<ag:Employee>
<ag:Name>SS2</ag:Name>
<ag:JoinDate>2021-10-12</ag:JoinDate>
<ag:Position>JJ2</ag:Position>
</ag:Employee>
<ag:Employee>
<ag:Name>SS12</ag:Name>
<ag:JoinDate>2020-10-12</ag:JoinDate>
<ag:Position>JJX</ag:Position>
</ag:Employee>
<ag:Main>
<ag:EmpID>1003</ag:EmpID>
<ag:Emp_Name>Fritz</ag:Emp_Name>
<ag:Emp_Address>IV</ag:Emp_Address>
</ag:Main>
<ag:Employee>
<ag:Name>SS13</ag:Name>
<ag:JoinDate>2020</ag:JoinDate>
<ag:Position>JJX</ag:Position>
</ag:Employee>
</ag:Emp_queue>
---
EmpID Name Address NewColumn
1002 Peter III Additional data for id 1002
1003 Fritz IV Additional data for id 1003
*/