Data Migration From Multilevel Xml To Single Table With Ssis
Goal I am attempting to migrate data from a multi-layered XML file with nested elements to a single table. System Parameters MS SQL Server Management Studio Microsoft Visual Studi
Solution 1:
Thx for the actual XML! The following query will get your values out of the XML. It will generate IDs for them to store all data in related tables.
Attention: I had to double the '
sign in woman's
and I added a second person
to show the approach:
DECLARE@x XML='<?xml version="1.0" encoding="UTF-8" ?>
<People>
<Person>
<FirstName>Eliza</FirstName>
<LastName>Ablovatski</LastName>
<Biography>
<![CDATA[<p>Eliza Ablovatski joined the Kenyon history department in 2003, after graduate work in East Central European history at Columbia University and research and fellowships in Munich and Berlin, Germany and Budapest, Hungary. She teaches classes on Europe from 1500 to the present, focusing on the nineteenth and twentieth centuries, Germany, Russia, the Habsburg Monarchy, film, nationalism and identity, gender, race, and the interwar period.</p>
<p>Her dissertation and first book, <em>Revolution and Political Violence in Central Europe: The Deluge of 1919</em> (forthcoming from Cambridge University Press), focus on the revolutionary upheavals in Munich and Budapest following the First World War, and their relationship to political violence and antisemitism. She is currently researching the occupation of Austria (1945-1955) at the end of the Second World War, and the nuclear idea in postwar Europe. She has also researched and written extensively on the history of Jews in the former Habsburg regional capital of Czernowitz (now Ukraine).</p>]]>
</Biography>
<Expertise>
<![CDATA[<p>Modern Europe, especially Germany and Central/East Central Europe in the nineteenth and twentieth centuries; European Jewish and women''s history, East European and German film and literature, socialism, war, and revolution.</p>]]>
</Expertise>
<Image>http://www.kenyon.edu/images/directory/ablovatski.jpg</Image>
<Link>http://www.kenyon.edu/directories/campus-directory/biography/eliza-ablovatski/</Link>
<Books>
<Book>
<Year></Year>
<Details>
<![CDATA[<p><em>Zwischen Pruth und Jordan. Lebenserinnerungen Czernowitzer Juden</em><em> , </em>with Gaby Coldewey and others Köln: Böhlau Verlag, 2003</p>]]>
</Details>
</Book>
<Book>
<Year></Year>
<Details>
<![CDATA[<p><em>Czernowitz ist gewen an alt jiddische Stdt: Überlebende berichten,</em> With Gaby Coldewey and others. First Edition: Czernowitz,Ukraine: distributed by the Heinrich-Böll-Stiftung, 1998 Second Edition: Berlin, 1999 (Third edition: Potsdam, forthcoming 2009)</p>]]>
</Details>
</Book>
</Books>
<Articles>
<Article>
<Year></Year>
<Details>
<![CDATA[<p>"The Central European Revolutions of 1919 and the Myth of Judeo-Bolshevism," <em>European Review of History, Vol. 17/ Issue 3: Cosmopolitanism, Nationalism and the Jews of East Central Europe (2010), 473-489.</em></p>]]>
</Details>
</Article>
<Article>
<Year></Year>
<Details>
<![CDATA[<p>"Between Red Army and White Guard: Women in Budapest, 1918-1919," in <em>Gender and War in Twentieth-Century Eastern Europe,</em> edited by Maria Bucur and Nancy Wingfield Bloomington: Indiana University Press 2006</p>]]>
</Details>
</Article>
<Article>
<Year></Year>
<Details>
<![CDATA[<p>"The Girl with the Titus-head: Women in Revolution in Munich and Budapest, 1919" <em>Nationalities Papers </em>28/3 (September 2000), 541-550</p>]]>
</Details>
</Article>
</Articles>
<Papers>
</Papers>
<Artwork>
</Artwork>
<Websites>
</Websites>
</Person>
<Person>
<FirstName>One</FirstName>
<LastName>More</LastName>
<Biography>Biography: Some interesting facts...</Biography>
<Expertise>Expertise: Some interesting facts...</Expertise>
<Image>somepicture.jpg</Image>
<Link>somelink.com</Link>
<Books>
<Book>
<Year>2001</Year>
<Details>Book1</Details>
</Book>
<Book>
<Year>2002</Year>
<Details>Book2</Details>
</Book>
</Books>
<Articles>
<Article>
<Year>2001</Year>
<Details>Article1</Details>
</Article>
</Articles>
<Papers>
</Papers>
<Artwork>
</Artwork>
<Websites>
</Websites>
</Person>
</People>';
With MyPersonCTE AS
(
SELECTROW_NUMBER() OVER(ORDERBY (SELECTNULL)) AS PersonID
,p.value('FirstName[1]','varchar(max)') AS FirstName
,p.value('LastName[1]','varchar(max)') AS LastName
,p.value('Biography[1]','varchar(max)') AS Biography
,p.value('Expertise[1]','varchar(max)') AS Expertise
,p.value('Image[1]','varchar(max)') AS Image
,p.value('Link[1]','varchar(max)') AS Link
,p.query('Books') AS BookNode
,p.query('Articles') AS ArticleNode
--same for Papers, Artwork...FROM@x.nodes('/People/Person') AS A(p)
)
,MyBooksCTE AS
(
SELECT MyPersonCTE.*
,ROW_NUMBER() OVER(ORDERBY (SELECTNULL)) AS BookID
,x.value('Year[1]','int') AS BookYear
,x.value('Details[1]','varchar(max)') AS BookDetails
FROM MyPersonCTE
CROSS APPLY MyPersonCTE.BookNode.nodes('/Books/Book') A(x)
)
,MyArticlesCTE AS
(
SELECT MyPersonCTE.*
,ROW_NUMBER() OVER(ORDERBY (SELECTNULL)) AS ArticleID
,x.value('Year[1]','int') AS ArticleYear
,x.value('Details[1]','varchar(max)') AS ArticleDetails
FROM MyPersonCTE
CROSS APPLY MyPersonCTE.ArticleNode.nodes('/Articles/Article') A(x)
)
--same for Papers, Artwork...SELECT p.*
,b.BookID
,b.BookYear
,b.BookDetails
,a.ArticleID
,a.ArticleYear
,a.ArticleDetails
INTO #tempAllData
FROM MyPersonCTE AS p
LEFTJOIN MyBooksCTE AS b ON p.PersonID=b.PersonID
LEFTJOIN MyArticlesCTE AS a ON p.PersonID=a.PersonID ;
--#tempAllData is now filled with all data, copied in all combination: much to much--but DISTINCT is your friend--in this case you'd use the PersonID as FK in all related tablesSELECTDISTINCT PersonID,FirstName,LastName,Biography,Expertise --other fieldsFROM #tempAllData;
SELECTDISTINCT PersonID,BookID,BookYear,BookDetails
FROM #tempAllData;
SELECTDISTINCT PersonID,ArticleID,ArticleYear,ArticleDetails
FROM #tempAllData;
DROPTABLE #tempAllData;
The results
Persons:
1 Eliza Ablovatski <p>Eliza Ablovatski joined ...
2One More Biography: Some interesting facts...
Books
110 <p><em>Zwischen Pruth und ...
120 <p><em>Czernowitz ist gewen ...
232001 Book1
242002 Book2
Articles
110 <p>"The Central European ...
120 <p>"Between Red Army and White ...
130 <p>"The Girl with the Titus-head: ...
242001 Article1
But what you really want to achieve is the one, big table
This is only possible with dynamic SQL. Start as above and change the query to the following. It will first find the columnnames automatically and then use a UNION ALL
to force all data into the same structure and finally there's a big, dynamic PIVOT
:
Attention: I added a PARTITION BY PersonID
to the CTEs related ROW_NUMBERs. This is to get IDs starting with 1
for each person
With MyPersonCTE AS
(
SELECTROW_NUMBER() OVER(ORDERBY (SELECTNULL)) AS PersonID
,p.value('FirstName[1]','varchar(max)') AS FirstName
,p.value('LastName[1]','varchar(max)') AS LastName
,p.value('Biography[1]','varchar(max)') AS Biography
,p.value('Expertise[1]','varchar(max)') AS Expertise
,p.value('Image[1]','varchar(max)') AS Image
,p.value('Link[1]','varchar(max)') AS Link
,p.query('Books') AS BookNode
,p.query('Articles') AS ArticleNode
--same for Papers, Artwork...FROM@x.nodes('/People/Person') AS A(p)
)
,MyBooksCTE AS
(
SELECT MyPersonCTE.*
,ROW_NUMBER() OVER(PARTITIONBY PersonID ORDERBY (SELECTNULL)) AS BookID
,x.value('Year[1]','int') AS BookYear
,x.value('Details[1]','varchar(max)') AS BookDetails
FROM MyPersonCTE
CROSS APPLY MyPersonCTE.BookNode.nodes('/Books/Book') A(x)
)
,MyArticlesCTE AS
(
SELECT MyPersonCTE.*
,ROW_NUMBER() OVER(PARTITIONBY PersonID ORDERBY (SELECTNULL)) AS ArticleID
,x.value('Year[1]','int') AS ArticleYear
,x.value('Details[1]','varchar(max)') AS ArticleDetails
FROM MyPersonCTE
CROSS APPLY MyPersonCTE.ArticleNode.nodes('/Articles/Article') A(x)
)
--same for Papers, Artwork...SELECT p.*
,b.BookID
,b.BookYear
,b.BookDetails
,a.ArticleID
,a.ArticleYear
,a.ArticleDetails
INTO #tempAllData
FROM MyPersonCTE AS p
LEFTJOIN MyBooksCTE AS b ON p.PersonID=b.PersonID
LEFTJOIN MyArticlesCTE AS a ON p.PersonID=a.PersonID ;
--#tempAllData is now filled with all data, copied in all combination: much to much--but DISTINCT is your friend--in this case you'd use the PersonID as FK in all related tablesSELECTDISTINCT PersonID,FirstName,LastName,Biography,Expertise --other fieldsINTO #tempPerson
FROM #tempAllData;
SELECTDISTINCT PersonID,BookID,BookYear,BookDetails
INTO #tempBooks
FROM #tempAllData;
SELECTDISTINCT PersonID,ArticleID,ArticleYear,ArticleDetails
INTO #tempArticles
FROM #tempAllData;
DECLARE@columnNamesVARCHAR(MAX)=
STUFF((SELECTDISTINCT',Book_'+CAST(BookID ASVARCHAR(10)) FROM #tempBooks FOR XML PATH('')),1,1,'')
+(SELECTDISTINCT',Article_'+CAST(ArticleID ASVARCHAR(10)) FROM #tempArticles FOR XML PATH(''));
DECLARE@cmdVARCHAR(MAX)='SELECT p.*
FROM
(
SELECT p.*
,''Book_''+CAST(BookID AS VARCHAR(10)) AS ColumnName
,ISNULL(CAST(BookYear AS VARCHAR(4)),'''') + '' '' + BookDetails AS Data
FROM #tempPerson AS p
INNER JOIN #tempBooks AS b ON p.PersonID=b.PersonID
UNION ALL
SELECT p.*
,''Article_''+CAST(ArticleID AS VARCHAR(10)) AS ColumnName
,ISNULL(CAST(ArticleYear AS VARCHAR(4)),'''') + '' '' + ArticleDetails AS Data
FROM #tempPerson AS p
INNER JOIN #tempArticles AS a ON p.PersonID=a.PersonID
) AS tbl
PIVOT
(
MAX(Data) FOR ColumnName IN('+@columnNames+')
) AS p;'EXEC(@cmd);
DROPTABLE #tempArticles
DROPTABLE #tempBooks
DROPTABLE #tempPerson
DROPTABLE #tempAllData;
Post a Comment for "Data Migration From Multilevel Xml To Single Table With Ssis"