Skip to content Skip to sidebar Skip to footer

Data Migration From Multilevel Xml To Single Table With Ssis

Goal I am attempting to migrate data from a multi-layered XML file with nested elements to a single table. System Parameters MS SQL Server Management Studio Microsoft Visual Studi

Solution 1:

Thx for the actual XML! The following query will get your values out of the XML. It will generate IDs for them to store all data in related tables.

Attention: I had to double the ' sign in woman's and I added a second person to show the approach:

DECLARE@x XML='<?xml version="1.0" encoding="UTF-8" ?>
<People>
    <Person>
        <FirstName>Eliza</FirstName>
        <LastName>Ablovatski</LastName>
        <Biography>
            <![CDATA[<p>Eliza Ablovatski joined the Kenyon history department in 2003, after graduate work in East Central European history at Columbia University and research and fellowships in Munich and Berlin, Germany and Budapest, Hungary. She teaches classes on Europe from 1500 to the present, focusing on the nineteenth and twentieth centuries, Germany, Russia, the Habsburg Monarchy, film, nationalism and identity, gender, race, and the interwar period.</p>
<p>Her dissertation and first book,&nbsp;<em>Revolution and Political Violence in Central Europe: The Deluge of 1919</em> (forthcoming from Cambridge University Press), focus on the revolutionary upheavals in Munich and Budapest following the First World War, and their relationship to political violence and antisemitism. She is currently researching the occupation of Austria (1945-1955) at the end of the Second World War, and the nuclear idea in postwar Europe. She has also researched and written extensively on the history of Jews in the former Habsburg regional capital of Czernowitz (now Ukraine).</p>]]>
        </Biography>
        <Expertise>
            <![CDATA[<p>Modern Europe, especially Germany and Central/East Central Europe in the nineteenth and twentieth centuries; European Jewish and women''s history, East European and German film and literature, socialism, war, and revolution.</p>]]>
        </Expertise>
        <Image>http://www.kenyon.edu/images/directory/ablovatski.jpg</Image>
        <Link>http://www.kenyon.edu/directories/campus-directory/biography/eliza-ablovatski/</Link>
        <Books>
            <Book>
                <Year></Year>
                <Details>
                    <![CDATA[<p><em>Zwischen Pruth und Jordan. Lebenserinnerungen Czernowitzer Juden</em><em>&nbsp;,&nbsp;</em>with Gaby Coldewey and others K&ouml;ln: B&ouml;hlau Verlag, 2003</p>]]>
                </Details>
            </Book>
            <Book>
                <Year></Year>
                <Details>
                    <![CDATA[<p><em>Czernowitz ist gewen an alt jiddische Stdt: &Uuml;berlebende berichten,</em>&nbsp;With Gaby Coldewey and others. First Edition: Czernowitz,Ukraine: distributed by the Heinrich-B&ouml;ll-Stiftung, 1998 Second Edition: Berlin, 1999 (Third edition: Potsdam, forthcoming 2009)</p>]]>
                </Details>
            </Book>
        </Books>
        <Articles>
            <Article>
                <Year></Year>
                <Details>
                    <![CDATA[<p>"The Central European Revolutions of 1919 and the Myth of Judeo-Bolshevism,"&nbsp;<em>European Review of History, Vol. 17/ Issue 3: Cosmopolitanism, Nationalism and the Jews of East Central Europe (2010), 473-489.</em></p>]]>
                </Details>
            </Article>
            <Article>
                <Year></Year>
                <Details>
                    <![CDATA[<p>"Between Red Army and White Guard: Women in Budapest, 1918-1919," in&nbsp;<em>Gender and War in Twentieth-Century Eastern Europe,</em>&nbsp;edited by Maria Bucur and Nancy Wingfield&nbsp;Bloomington: Indiana University Press 2006</p>]]>
                </Details>
            </Article>
            <Article>
                <Year></Year>
                <Details>
                    <![CDATA[<p>"The Girl with the Titus-head: Women in Revolution in Munich and Budapest, 1919"&nbsp;<em>Nationalities Papers&nbsp;</em>28/3 (September 2000), 541-550</p>]]>
                </Details>
            </Article>
        </Articles>
        <Papers>
        </Papers>
        <Artwork>
        </Artwork>
        <Websites>
        </Websites>
    </Person>
    <Person>
        <FirstName>One</FirstName>
        <LastName>More</LastName>
        <Biography>Biography: Some interesting facts...</Biography>
        <Expertise>Expertise: Some interesting facts...</Expertise>
        <Image>somepicture.jpg</Image>
        <Link>somelink.com</Link>
        <Books>
            <Book>
                <Year>2001</Year>
                <Details>Book1</Details>
            </Book>
            <Book>
                <Year>2002</Year>
                <Details>Book2</Details>
            </Book>
        </Books>
        <Articles>
            <Article>
                <Year>2001</Year>
                <Details>Article1</Details>
            </Article>
        </Articles>
        <Papers>
        </Papers>
        <Artwork>
        </Artwork>
        <Websites>
        </Websites>
    </Person>
</People>';

With MyPersonCTE AS
(
    SELECTROW_NUMBER() OVER(ORDERBY (SELECTNULL)) AS PersonID
          ,p.value('FirstName[1]','varchar(max)') AS FirstName
          ,p.value('LastName[1]','varchar(max)') AS LastName
          ,p.value('Biography[1]','varchar(max)') AS Biography
          ,p.value('Expertise[1]','varchar(max)') AS Expertise
          ,p.value('Image[1]','varchar(max)') AS Image
          ,p.value('Link[1]','varchar(max)') AS Link
          ,p.query('Books') AS BookNode
          ,p.query('Articles') AS ArticleNode
          --same for Papers, Artwork...FROM@x.nodes('/People/Person') AS A(p) 
)
,MyBooksCTE AS
(
    SELECT MyPersonCTE.*
          ,ROW_NUMBER() OVER(ORDERBY (SELECTNULL)) AS BookID
          ,x.value('Year[1]','int') AS BookYear
          ,x.value('Details[1]','varchar(max)') AS BookDetails
    FROM MyPersonCTE
    CROSS APPLY MyPersonCTE.BookNode.nodes('/Books/Book') A(x)  
)
,MyArticlesCTE AS
(
    SELECT MyPersonCTE.*
          ,ROW_NUMBER() OVER(ORDERBY (SELECTNULL)) AS ArticleID
          ,x.value('Year[1]','int') AS ArticleYear
          ,x.value('Details[1]','varchar(max)') AS ArticleDetails
    FROM MyPersonCTE
    CROSS APPLY MyPersonCTE.ArticleNode.nodes('/Articles/Article') A(x)  
)
--same for Papers, Artwork...SELECT p.*
      ,b.BookID
      ,b.BookYear
      ,b.BookDetails
      ,a.ArticleID
      ,a.ArticleYear
      ,a.ArticleDetails  
INTO #tempAllData
FROM MyPersonCTE AS p
LEFTJOIN MyBooksCTE AS b ON p.PersonID=b.PersonID
LEFTJOIN MyArticlesCTE AS a ON p.PersonID=a.PersonID ;

--#tempAllData is now filled with all data, copied in all combination: much to much--but DISTINCT is your friend--in this case you'd use the PersonID as FK in all related tablesSELECTDISTINCT PersonID,FirstName,LastName,Biography,Expertise --other fieldsFROM #tempAllData;

SELECTDISTINCT PersonID,BookID,BookYear,BookDetails
FROM #tempAllData;

SELECTDISTINCT PersonID,ArticleID,ArticleYear,ArticleDetails
FROM #tempAllData;

DROPTABLE #tempAllData;

The results

Persons:

1   Eliza    Ablovatski     <p>Eliza Ablovatski joined ...
2One      More           Biography: Some interesting facts...    

Books

110       <p><em>Zwischen Pruth und ...
120       <p><em>Czernowitz ist gewen ...
232001    Book1
242002    Book2

Articles

110       <p>"The Central European ...
120       <p>"Between Red Army and White ...
130       <p>"The Girl with the Titus-head: ...
242001    Article1

But what you really want to achieve is the one, big table

This is only possible with dynamic SQL. Start as above and change the query to the following. It will first find the columnnames automatically and then use a UNION ALL to force all data into the same structure and finally there's a big, dynamic PIVOT:

Attention: I added a PARTITION BY PersonID to the CTEs related ROW_NUMBERs. This is to get IDs starting with 1for each person

With MyPersonCTE AS
(
    SELECTROW_NUMBER() OVER(ORDERBY (SELECTNULL)) AS PersonID
          ,p.value('FirstName[1]','varchar(max)') AS FirstName
          ,p.value('LastName[1]','varchar(max)') AS LastName
          ,p.value('Biography[1]','varchar(max)') AS Biography
          ,p.value('Expertise[1]','varchar(max)') AS Expertise
          ,p.value('Image[1]','varchar(max)') AS Image
          ,p.value('Link[1]','varchar(max)') AS Link
          ,p.query('Books') AS BookNode
          ,p.query('Articles') AS ArticleNode
          --same for Papers, Artwork...FROM@x.nodes('/People/Person') AS A(p) 
)
,MyBooksCTE AS
(
    SELECT MyPersonCTE.*
          ,ROW_NUMBER() OVER(PARTITIONBY PersonID ORDERBY (SELECTNULL)) AS BookID
          ,x.value('Year[1]','int') AS BookYear
          ,x.value('Details[1]','varchar(max)') AS BookDetails
    FROM MyPersonCTE
    CROSS APPLY MyPersonCTE.BookNode.nodes('/Books/Book') A(x)  
)
,MyArticlesCTE AS
(
    SELECT MyPersonCTE.*
          ,ROW_NUMBER() OVER(PARTITIONBY PersonID ORDERBY (SELECTNULL)) AS ArticleID
          ,x.value('Year[1]','int') AS ArticleYear
          ,x.value('Details[1]','varchar(max)') AS ArticleDetails
    FROM MyPersonCTE
    CROSS APPLY MyPersonCTE.ArticleNode.nodes('/Articles/Article') A(x)  
)
--same for Papers, Artwork...SELECT p.*
      ,b.BookID
      ,b.BookYear
      ,b.BookDetails
      ,a.ArticleID
      ,a.ArticleYear
      ,a.ArticleDetails  
INTO #tempAllData
FROM MyPersonCTE AS p
LEFTJOIN MyBooksCTE AS b ON p.PersonID=b.PersonID
LEFTJOIN MyArticlesCTE AS a ON p.PersonID=a.PersonID ;

--#tempAllData is now filled with all data, copied in all combination: much to much--but DISTINCT is your friend--in this case you'd use the PersonID as FK in all related tablesSELECTDISTINCT PersonID,FirstName,LastName,Biography,Expertise --other fieldsINTO #tempPerson
FROM #tempAllData;

SELECTDISTINCT PersonID,BookID,BookYear,BookDetails
INTO #tempBooks
FROM #tempAllData;

SELECTDISTINCT PersonID,ArticleID,ArticleYear,ArticleDetails
INTO #tempArticles
FROM #tempAllData;

DECLARE@columnNamesVARCHAR(MAX)=
 STUFF((SELECTDISTINCT',Book_'+CAST(BookID ASVARCHAR(10)) FROM #tempBooks FOR XML PATH('')),1,1,'')
+(SELECTDISTINCT',Article_'+CAST(ArticleID ASVARCHAR(10)) FROM #tempArticles FOR XML PATH(''));

DECLARE@cmdVARCHAR(MAX)='SELECT p.*
FROM
(
    SELECT p.*
          ,''Book_''+CAST(BookID AS VARCHAR(10)) AS ColumnName
          ,ISNULL(CAST(BookYear AS VARCHAR(4)),'''') + '' '' + BookDetails AS Data
    FROM #tempPerson AS p
    INNER JOIN #tempBooks AS b ON p.PersonID=b.PersonID
    UNION ALL
    SELECT p.*
          ,''Article_''+CAST(ArticleID AS VARCHAR(10)) AS ColumnName
          ,ISNULL(CAST(ArticleYear AS VARCHAR(4)),'''') + '' '' + ArticleDetails AS Data
    FROM #tempPerson AS p
    INNER JOIN #tempArticles AS a ON p.PersonID=a.PersonID
) AS tbl
PIVOT
(
    MAX(Data) FOR ColumnName IN('+@columnNames+')
) AS p;'EXEC(@cmd);

DROPTABLE #tempArticles
DROPTABLE #tempBooks 
DROPTABLE #tempPerson
DROPTABLE #tempAllData;

Post a Comment for "Data Migration From Multilevel Xml To Single Table With Ssis"