Read XML data into hierarchical tables

橙三吉。 提交于 2019-12-24 11:07:25

问题


I have XML data that is used to store quiz results. I need to convert this into two tables, one containing the questions, and the other containing the answers, but crucially, have a relation between them.

Currently this relation only exists in the XML structure (there are no ID values, etc.).

After a day of research and testing out different approaches, I've got as far as extracting the two parts, but cannot figure out how to create the hierarchy:

declare @xml xml = N'<quizresult>
  <question>
    <questionText>Which fire extinguisher is most suitable for a waste paper basket fire?</questionText>
    <answer number="0" value="0" chosen="0" imageURL="">Powder</answer>
    <answer number="1" value="0" chosen="0" imageURL="">Carbon Dioxide (CO2)</answer>
    <answer number="2" value="1" chosen="1" imageURL="">Water (H2O)</answer>
    <answer number="3" value="0" chosen="0" imageURL="">Foam</answer>
    <result>Correct</result>
  </question>
  <question>
    <questionText>What should your immediate action be on hearing a fire alarm?</questionText>
    <answer number="0" value="0" chosen="0" imageURL="">Find all of your colleagues before making a speedy exit together</answer>
    <answer number="1" value="0" chosen="0" imageURL="">Collect all your valuables before making a speedy exit</answer>
    <answer number="2" value="0" chosen="0" imageURL="">Check the weather to see if you need your coat before leaving</answer>
    <answer number="3" value="1" chosen="1" imageURL="">Leave the building by the nearest exit, closing doors behind you if the rooms are empty</answer>
    <result>Correct</result>
  </question>
  <question>
    <questionText>Which is the most suitable extinguisher for a Computer which is on fire?</questionText>
    <answer number="0" value="0" chosen="1" imageURL="">Water (H2O)</answer>
    <answer number="1" value="0" chosen="0" imageURL="">Powder</answer>
    <answer number="2" value="1" chosen="0" imageURL="">Carbon Dioxide (CO2)</answer>
    <result>Incorrect</result>
  </question>
</quizresult>';

-- Get questions only

DECLARE @questions TABLE (questionText nvarchar(max), result nvarchar(50));
INSERT INTO @questions (questionText, result)
SELECT
    n.q.value('(./questionText)[1]', 'nvarchar(max)') AS questionText,
    n.q.value('(./result)[1]', 'nvarchar(50)') AS result
FROM
    @xml.nodes('/quizresult/question') AS n (q);

-- Get answers only

DECLARE @answers TABLE (answer nvarchar(max), number int, val int, chosen bit);
INSERT INTO @answers (answer, number, val, chosen)
SELECT
    n.q.value('.[1]', 'nvarchar(max)') AS answer,
    n.q.value('@number', 'int') AS number,
    n.q.value('@value', 'int') AS val,
    n.q.value('@chosen', 'bit') AS chosen
FROM
    @xml.nodes('/quizresult/question/answer') AS n (q);

Can anyone please enlighten me if it is possible to create IDs/GUID's (or something) to create the parent/child hierarchy that respects the XML file? I should add, in actual fact this is an XML column and the data will be transformed en-masse. I'm just using a variable until I figure out the basic approach.


回答1:


We can (ab)use ROW_NUMBER() to generate the IDs outside XQuery. Preamble:

WITH questions AS (
    SELECT
        ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS ID,
        n.q.value('(./questionText)[1]', 'nvarchar(max)') AS questionText,
        n.q.value('(./result)[1]', 'nvarchar(50)') AS result,
        n.q.query('answer') AS answers
    FROM
        @xml.nodes('/quizresult/question') AS n (q)
), questions_and_answers AS (
    SELECT ID, questionText, result, answer.query('.') AS answer
    FROM questions
    CROSS APPLY answers.nodes('answer') AS a(answer)
)

And now retrieve the questions with

SELECT ID, questionText, result 
FROM questions

And the answers with

SELECT ID AS questionID,
    q.answer.value('answer[1]', 'nvarchar(max)') AS answer,
    q.answer.value('answer[1]/@number', 'int') AS number,
    q.answer.value('answer[1]/@value', 'int') AS val,
    q.answer.value('answer[1]/@chosen', 'bit') AS chosen
FROM questions_and_answers AS q



回答2:


By using MERGE to insert your questions, you can capture fields from both the source and target data. So you can then access both the newly inserted question ID, and the corresponding answers and remove the need to rely on question text to link questions to answers.

You just need a mapping table to store the intermediate results after inserting questions.

DECLARE @Mapping TABLE (QuestionID INT NOT NULL, Answers XML);

MERGE @questions AS q
USING
(   SELECT  questionText = q.x.value('questionText[1]', 'NVARCHAR(MAX)'),
            result = q.x.value('result[1]', 'NVARCHAR(MAX)'),
            Answers = q.x.query('answer')
    FROM    @xml.nodes('quizresult/question') q (x)
) AS t
    ON 1 = 0 
WHEN NOT MATCHED THEN 
    INSERT (QuestionText, Result)
    VALUES (t.QuestionText, t.Result)
OUTPUT inserted.QuestionID, t.Answers INTO @Mapping (QuestionID, Answers);

Then having stored the intermediate results, including the question ID, you can query the mapping table to insert the answers.

-- INSERT ANSWERS USING MAPPING TABLE
INSERT @Answers (QuestionID, Answer, Number, Val, Chosen)
SELECT  m.QuestionID,
        answer = a.x.value('text()[1]', 'NVARCHAR(MAX)'),
        number = a.x.value('@number[1]', 'INT'),
        val = a.x.value('@value[1]', 'INT'),
        chosen = a.x.value('@chosen[1]', 'BIT')
FROM    @Mapping m
        CROSS APPLY Answers.nodes('answer') a (x);

Full working Demo

DECLARE @xml XML = N'<quizresult>
  <question>
    <questionText>Which fire extinguisher is most suitable for a waste paper basket fire?</questionText>
    <answer number="0" value="0" chosen="0" imageURL="">Powder</answer>
    <answer number="1" value="0" chosen="0" imageURL="">Carbon Dioxide (CO2)</answer>
    <answer number="2" value="1" chosen="1" imageURL="">Water (H2O)</answer>
    <answer number="3" value="0" chosen="0" imageURL="">Foam</answer>
    <result>Correct</result>
  </question>
  <question>
    <questionText>What should your immediate action be on hearing a fire alarm?</questionText>
    <answer number="0" value="0" chosen="0" imageURL="">Find all of your colleagues before making a speedy exit together</answer>
    <answer number="1" value="0" chosen="0" imageURL="">Collect all your valuables before making a speedy exit</answer>
    <answer number="2" value="0" chosen="0" imageURL="">Check the weather to see if you need your coat before leaving</answer>
    <answer number="3" value="1" chosen="1" imageURL="">Leave the building by the nearest exit, closing doors behind you if the rooms are empty</answer>
    <result>Correct</result>
  </question>
  <question>
    <questionText>Which is the most suitable extinguisher for a Computer which is on fire?</questionText>
    <answer number="0" value="0" chosen="1" imageURL="">Water (H2O)</answer>
    <answer number="1" value="0" chosen="0" imageURL="">Powder</answer>
    <answer number="2" value="1" chosen="0" imageURL="">Carbon Dioxide (CO2)</answer>
    <result>Incorrect</result>
  </question>
  <question>
    <questionText>Which is the most suitable extinguisher for a Computer which is on fire?</questionText>
    <answer number="0" value="0" chosen="1" imageURL="">Water (H2O) DUPLICATE</answer>
    <answer number="1" value="0" chosen="0" imageURL="">Powder DUPLICATE</answer>
    <answer number="2" value="1" chosen="0" imageURL="">Carbon Dioxide (CO2) DUPLICATE</answer>
    <result>Incorrect</result>
  </question>
</quizresult>';

-- DEMO TARGE TABLES
DECLARE @questions TABLE 
(
    QuestionID INT IDENTITY(1, 1) NOT NULL,
    questionText NVARCHAR(MAX), 
    result NVARCHAR(50)
);
DECLARE @answers TABLE 
(
    AnswerID INT IDENTITY(1, 1) NOT NULL,
    QuestionID INT NOT NULL,
    answer NVARCHAR(MAX), 
    number INT, 
    val INT, 
    chosen BIT
);



-- MAPPING TABLE
DECLARE @Mapping TABLE (QuestionID INT NOT NULL, Answers XML);

-- INSERT ANSWERS
MERGE @questions AS q
USING
(   SELECT  questionText = q.x.value('questionText[1]', 'NVARCHAR(MAX)'),
            result = q.x.value('result[1]', 'NVARCHAR(MAX)'),
            Answers = q.x.query('answer')
    FROM    @xml.nodes('quizresult/question') q (x)
) AS t
    ON 1 = 0 
WHEN NOT MATCHED THEN 
    INSERT (QuestionText, Result)
    VALUES (t.QuestionText, t.Result)
OUTPUT inserted.QuestionID, t.Answers INTO @Mapping (QuestionID, Answers);

-- INSERT ANSWERS USING MAPPING TABLE
INSERT @Answers (QuestionID, Answer, Number, Val, Chosen)
SELECT  m.QuestionID,
        answer = a.x.value('text()[1]', 'NVARCHAR(MAX)'),
        number = a.x.value('@number[1]', 'INT'),
        val = a.x.value('@value[1]', 'INT'),
        chosen = a.x.value('@chosen[1]', 'BIT')
FROM    @Mapping m
        CROSS APPLY Answers.nodes('answer') a (x);

-- CHECK RESULTS
SELECT  *
FROM    @Questions AS q
        INNER JOIN @Answers AS a
            ON a.QuestionID = q.QuestionID;



回答3:


The only relationship here is with questionText. Hence this column can be fetched like this

DECLARE @answers TABLE (questionText nvarchar(max),answer nvarchar(max), 
number int, val int, chosen bit);

INSERT INTO @answers (questionText, answer, number, val, chosen)
SELECT
n.q.value('(../questionText)[1]', 'nvarchar(max)') as questionText,
n.q.value('.[1]', 'nvarchar(max)') AS answer,
n.q.value('@number', 'int') AS number,
n.q.value('@value', 'int') AS val,
n.q.value('@chosen', 'bit') AS chosen
FROM
@xml.nodes('/quizresult/question/answer') AS n (q);

Alternatively, you can have ID generated based on questionText

DECLARE @questions TABLE (Id int, questionText nvarchar(max), result nvarchar(50));

INSERT INTO @questions (id, questionText, result)
SELECT
    Rank() over(order by n.q.value('(./questionText)[1]', 'nvarchar(max)')) as Id,
    n.q.value('(./questionText)[1]', 'nvarchar(max)') AS questionText,
    n.q.value('(./result)[1]', 'nvarchar(50)') AS result
FROM
    @xml.nodes('/quizresult/question') AS n (q);

DECLARE @answers TABLE (Id int, questionText nvarchar(max),answer 
nvarchar(max), number int, val int, chosen bit);

INSERT INTO @answers (Id, questionText, answer, number, val, chosen)
SELECT
   Dense_rank() over(order by n.q.value('(../questionText)[1]', 'nvarchar(max)')) as Id,
   n.q.value('(../questionText)[1]', 'nvarchar(max)') as questionText,
   n.q.value('.[1]', 'nvarchar(max)') AS answer,
   n.q.value('@number', 'int') AS number,
   n.q.value('@value', 'int') AS val,
   n.q.value('@chosen', 'bit') AS chosen
FROM
   @xml.nodes('/quizresult/question/answer') AS n (q);

Other solution would be if same question/answer could repeat

DECLARE @questions TABLE (Id int identity(1,1), questionText nvarchar(max), result nvarchar(50), answer xml);
INSERT INTO @questions ( questionText, result, answer)
SELECT
n.q.value('(./questionText)[1]', 'nvarchar(max)') AS questionText,
n.q.value('(./result)[1]', 'nvarchar(50)') AS result,
 n.q.query('answer') AS answer
FROM
@xml.nodes('/quizresult/question') AS n (q);

DECLARE @answers TABLE (Id int, questionText nvarchar(max),answer nvarchar(max), number int, val int, chosen bit);
INSERT INTO @answers (Id, questionText, answer, number, val, chosen)
SELECT
q.Id as Id,
q.questionText as questionText,
n.q.value('.[1]', 'nvarchar(max)') AS answer,
n.q.value('@number', 'int') AS number,
n.q.value('@value', 'int') AS val,
n.q.value('@chosen', 'bit') AS chosen
FROM
@questions q
 outer apply q.answer.nodes('answer') as n(q)

select * from @questions
select * from @answers


来源:https://stackoverflow.com/questions/45883457/read-xml-data-into-hierarchical-tables

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!