问题
I wonder if there is an easy way to read Excel 2010 XML?
This XML has a different structure than I used to read.
Especially ss:index attribute (ss:Index**="7") makes things a bit more complicated
EDIT: To explain better:
- I have file with XML extension which can be easily opened in Excel
- I'm looking a way to read this sheet programically (eg copy to DataTable)
- Noticed this is not common XML I used to work
- XML defines fields on the begining, than use ROW, CELL and DATA tags
- What surprised me is when there are eg 3 fields (cells) but 2nd field doesn't have value this field is 'skipped' in XML, but 3rd field has some additional 'index' attribute eg: ss:Index**="3" (which indicates that even it is on 2nd position right index should be "3"
fragment of example XML
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="String">Johny</Data></Cell>
<Cell ss:Index="3"><Data ss:Type="String">NY</Data></Cell>
回答1:
OK I've finally found solution from here: http://www.codeproject.com/Articles/32370/Import-Excel-File-to-DataSet#xx
Below sample code little adopted to my needs.
public static class XMLtoDataTable {
private static ColumnType getDefaultType() {
return new ColumnType(typeof(String));
}
struct ColumnType {
public Type type;
private string name;
public ColumnType(Type type) { this.type = type; this.name = type.ToString().ToLower(); }
public object ParseString(string input) {
if (String.IsNullOrEmpty(input))
return DBNull.Value;
switch (type.ToString()) {
case "system.datetime":
return DateTime.Parse(input);
case "system.decimal":
return decimal.Parse(input);
case "system.boolean":
return bool.Parse(input);
default:
return input;
}
}
}
private static ColumnType getType(XmlNode data) {
string type = null;
if (data.Attributes["ss:Type"] == null || data.Attributes["ss:Type"].Value == null)
type = "";
else
type = data.Attributes["ss:Type"].Value;
switch (type) {
case "DateTime":
return new ColumnType(typeof(DateTime));
case "Boolean":
return new ColumnType(typeof(Boolean));
case "Number":
return new ColumnType(typeof(Decimal));
case "":
decimal test2;
if (data == null || String.IsNullOrEmpty(data.InnerText) || decimal.TryParse(data.InnerText, out test2)) {
return new ColumnType(typeof(Decimal));
} else {
return new ColumnType(typeof(String));
}
default://"String"
return new ColumnType(typeof(String));
}
}
public static DataSet ImportExcelXML (string fileName, bool hasHeaders, bool autoDetectColumnType) {
StreamReader sr = new StreamReader( fileName);
Stream st = (Stream) sr.BaseStream;
return ImportExcelXML( st, hasHeaders, autoDetectColumnType);
}
private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders, bool autoDetectColumnType) {
XmlDocument doc = new XmlDocument();
doc.Load(new XmlTextReader(inputFileStream));
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");
DataSet ds = new DataSet();
foreach (XmlNode node in
doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr)) {
DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
ds.Tables.Add(dt);
XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
if (rows.Count > 0) {
//*************************
//Add Columns To Table from header row
//*************************
List<ColumnType> columns = new List<ColumnType>();
int startIndex = 0;
if (hasHeaders) {
foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr)) {
columns.Add(new ColumnType(typeof(string)));//default to text
dt.Columns.Add(data.InnerText, typeof(string));
}
startIndex++;
}
//*************************
//Update Data-Types of columns if Auto-Detecting
//*************************
if (autoDetectColumnType && rows.Count > 0) {
XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++) {
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex =
int.Parse(cell.Attributes["ss:Index"].Value) - 1;
ColumnType autoDetectType =
getType(cell.SelectSingleNode("ss:Data", nsmgr));
if (actualCellIndex >= dt.Columns.Count) {
dt.Columns.Add("Column" +
actualCellIndex.ToString(), autoDetectType.type);
columns.Add(autoDetectType);
} else {
dt.Columns[actualCellIndex].DataType = autoDetectType.type;
columns[actualCellIndex] = autoDetectType;
}
actualCellIndex++;
}
}
//*************************
//Load Data
//*************************
for (int i = startIndex; i < rows.Count; i++) {
DataRow row = dt.NewRow();
XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++) {
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;
XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);
if (actualCellIndex >= dt.Columns.Count) {
for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++) {
dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string));columns.Add(getDefaultType());
} // ii
ColumnType autoDetectType =
getType(cell.SelectSingleNode("ss:Data", nsmgr));
dt.Columns.Add("Column" + actualCellIndex.ToString(),
typeof(string));
columns.Add(autoDetectType);
}
if (data != null)
row[actualCellIndex] = data.InnerText;
actualCellIndex++;
}
dt.Rows.Add(row);
}
}
}
return ds;
}
}
回答2:
You can try with this code
var textReader = new XmlTextReader("...\\YourFile.xml");
// Read until end of file
while (textReader.Read())
{
XmlNodeType nType = textReader.NodeType;
// If node type us a declaration
if (nType == XmlNodeType.XmlDeclaration)
{
Console.WriteLine("Declaration:" + textReader.Name.ToString());
}
// if node type is a comment
if (nType == XmlNodeType.Comment)
{
Console.WriteLine("Comment:" + textReader.Name.ToString());
}
// if node type us an attribute
if (nType == XmlNodeType.Attribute)
{
Console.WriteLine("Attribute:" + textReader.Name.ToString());
}
// if node type is an element
if (nType == XmlNodeType.Element)
{
Console.WriteLine("Element:" + textReader.Name.ToString());
}
// if node type is an entity\
if (nType == XmlNodeType.Entity)
{
Console.WriteLine("Entity:" + textReader.Name.ToString());
}
// if node type is a Process Instruction
if (nType == XmlNodeType.Entity)
{
Console.WriteLine("Entity:" + textReader.Name.ToString());
}
// if node type a document
if (nType == XmlNodeType.DocumentType)
{
Console.WriteLine("Document:" + textReader.Name.ToString());
}
// if node type is white space
if (nType == XmlNodeType.Whitespace)
{
Console.WriteLine("WhiteSpace:" + textReader.Name.ToString());
}
}
回答3:
Even without coulmn type its working fine too. thanks !!!
have sample for without coulmn type:
private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders)
{
XmlDocument doc = new XmlDocument();
doc.Load(new XmlTextReader(inputFileStream));
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");
DataSet ds = new DataSet();
foreach (XmlNode node in
doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr))
{
DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
ds.Tables.Add(dt);
XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
if (rows.Count > 0)
{
//*************************
//Add Columns To Table from header row
//*************************
List<ColumnType> columns = new List<ColumnType>();
int startIndex = 0;
if (hasHeaders)
{
foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr))
{
columns.Add(new ColumnType(typeof(string)));//default to text
dt.Columns.Add(data.InnerText, typeof(string));
}
startIndex++;
}
//*************************
//Update Data-Types of columns if Auto-Detecting
//*************************
if (rows.Count > 0)
{
XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
{
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex =
int.Parse(cell.Attributes["ss:Index"].Value) - 1;
//ColumnType autoDetectType =
// getType(cell.SelectSingleNode("ss:Data", nsmgr));
if (actualCellIndex >= dt.Columns.Count)
{
//dt.Columns.Add("Column" +
// actualCellIndex.ToString(), autoDetectType.type);
dt.Columns.Add("Column" + actualCellIndex.ToString());
//columns.Add(autoDetectType);
}
else
{
//dt.Columns[actualCellIndex].DataType = autoDetectType.type;
//columns[actualCellIndex] = autoDetectType;
}
actualCellIndex++;
}
}
//*************************
//Load Data
//*************************
for (int i = startIndex; i < rows.Count; i++)
{
DataRow row = dt.NewRow();
XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
{
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;
XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);
if (actualCellIndex >= dt.Columns.Count)
{
for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++)
{
//dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string)); columns.Add(getDefaultType());
dt.Columns.Add("Column" + actualCellIndex.ToString());
//columns.Add(getDefaultType());
} // ii
//ColumnType autoDetectType = getType(cell.SelectSingleNode("ss:Data", nsmgr));
dt.Columns.Add("Column" + actualCellIndex.ToString(),
typeof(string));
// columns.Add(autoDetectType);
}
if (data != null)
row[actualCellIndex] = data.InnerText;
actualCellIndex++;
}
dt.Rows.Add(row);
}
}
}
return ds;
}
回答4:
public static class XMLtoDataTable
{
private static ColumnType getDefaultType()
{
return new ColumnType(typeof(String));
}
struct ColumnType
{
public Type type;
private string name;
public ColumnType(Type type) { this.type = type; this.name = type.ToString().ToLower(); }
public object ParseString(string input)
{
if (String.IsNullOrEmpty(input))
return DBNull.Value;
switch (type.ToString())
{
case "system.datetime":
return DateTime.Parse(input);
case "system.decimal":
return decimal.Parse(input);
case "system.boolean":
return bool.Parse(input);
default:
return input;
}
}
}
private static ColumnType getType(XmlNode data)
{
string type = null;
if (data.Attributes["ss:Type"] == null || data.Attributes["ss:Type"].Value == null)
type = "";
else
type = data.Attributes["ss:Type"].Value;
switch (type)
{
case "DateTime":
return new ColumnType(typeof(DateTime));
case "Boolean":
return new ColumnType(typeof(Boolean));
case "Number":
return new ColumnType(typeof(Decimal));
case "":
decimal test2;
if (data == null || String.IsNullOrEmpty(data.InnerText) || decimal.TryParse(data.InnerText, out test2))
{
return new ColumnType(typeof(Decimal));
}
else
{
return new ColumnType(typeof(String));
}
default://"String"
return new ColumnType(typeof(String));
}
}
public static DataSet ImportExcelXML(string fileName, bool hasHeaders, bool autoDetectColumnType)
{
StreamReader sr = new StreamReader(fileName);
Stream st = (Stream)sr.BaseStream;
return ImportExcelXML(st, hasHeaders, autoDetectColumnType);
}
private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders, bool autoDetectColumnType)
{
XmlDocument doc = new XmlDocument();
doc.Load(new XmlTextReader(inputFileStream));
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");
DataSet ds = new DataSet();
foreach (XmlNode node in
doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr))
{
DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
ds.Tables.Add(dt);
XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
if (rows.Count > 0)
{
//*************************
//Add Columns To Table from header row
//*************************
List<ColumnType> columns = new List<ColumnType>();
int startIndex = 0;
if (hasHeaders)
{
foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr))
{
columns.Add(new ColumnType(typeof(string)));//default to text
dt.Columns.Add(data.InnerText, typeof(string));
}
startIndex++;
}
//*************************
//Update Data-Types of columns if Auto-Detecting
//*************************
if (autoDetectColumnType && rows.Count > 0)
{
XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
{
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex =
int.Parse(cell.Attributes["ss:Index"].Value) - 1;
ColumnType autoDetectType =
getType(cell.SelectSingleNode("ss:Data", nsmgr));
if (actualCellIndex >= dt.Columns.Count)
{
dt.Columns.Add("Column" +
actualCellIndex.ToString(), autoDetectType.type);
columns.Add(autoDetectType);
}
else
{
dt.Columns[actualCellIndex].DataType = autoDetectType.type;
columns[actualCellIndex] = autoDetectType;
}
actualCellIndex++;
}
}
//*************************
//Load Data
//*************************
for (int i = startIndex; i < rows.Count; i++)
{
DataRow row = dt.NewRow();
XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
{
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;
XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);
if (actualCellIndex >= dt.Columns.Count)
{
for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++)
{
dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string)); columns.Add(getDefaultType());
} // ii
ColumnType autoDetectType =
getType(cell.SelectSingleNode("ss:Data", nsmgr));
dt.Columns.Add("Column" + actualCellIndex.ToString(),
typeof(string));
columns.Add(autoDetectType);
}
if (data != null)
row[actualCellIndex] = data.InnerText;
actualCellIndex++;
}
dt.Rows.Add(row);
}
}
}
return ds;
}
}
回答5:
Try the official API first (Microsoft Open XML SDK).
回答6:
public class ExcelXMLtoDataTable
{
private static ColumnType getDefaultType()
{
return new ColumnType(typeof(String));
}
struct ColumnType
{
public Type type;
private string name;
public ColumnType(Type type)
{
this.type = type; this.name = type.ToString().ToLower();
}
public object ParseString(string input)
{
if (String.IsNullOrEmpty(input))
return DBNull.Value;
switch (type.ToString())
{
case "system.datetime":
return DateTime.Parse(input);
case "system.decimal":
return decimal.Parse(input);
case "system.boolean":
return bool.Parse(input);
default:
return input;
}
}
}
private static ColumnType getType(XmlNode data)
{
string type = null;
//if (data == null)
// type = "";
// else
if (data.Attributes["ss:Type"] == null || data.Attributes["ss:Type"].Value == "")
type = "";
else
type = data.Attributes["ss:Type"].Value;
switch (type)
{
case "DateTime":
return new ColumnType(typeof(DateTime));
case "Boolean":
return new ColumnType(typeof(Boolean));
case "Number":
return new ColumnType(typeof(Decimal));
case "":
decimal test2;
if (data == null || String.IsNullOrEmpty(data.InnerText) || decimal.TryParse(data.InnerText, out test2))
{
return new ColumnType(typeof(Decimal));
return new ColumnType(typeof(String));
}
else
{
return new ColumnType(typeof(String));
}
default://"String"
return new ColumnType(typeof(String));
}
}
public static DataSet ImportExcelXML(string fileName, bool hasHeaders, bool autoDetectColumnType)
{
StreamReader sr = new StreamReader(fileName);
Stream st = (Stream)sr.BaseStream;
return ImportExcelXML(st, hasHeaders, autoDetectColumnType);
}
private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders, bool autoDetectColumnType)
{
XmlDocument doc = new XmlDocument();
doc.Load(new XmlTextReader(inputFileStream));
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");
DataSet ds = new DataSet();
foreach (XmlNode node in
doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr))
{
DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
ds.Tables.Add(dt);
XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
if (rows.Count > 0)
{
//*************************
//Add Columns To Table from header row
//*************************
List<ColumnType> columns = new List<ColumnType>();
int startIndex = 0;
if (hasHeaders)
{
foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr))
{
columns.Add(new ColumnType(typeof(string)));//default to text
dt.Columns.Add(data.InnerText, typeof(string));
}
startIndex++;
}
//*************************
//Update Data-Types of columns if Auto-Detecting
//*************************
if (autoDetectColumnType && rows.Count > 0)
{
XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
{
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex =
int.Parse(cell.Attributes["ss:Index"].Value) - 1;
//ColumnType autoDetectType =
// getType(cell.SelectSingleNode("ss:Data", nsmgr));
if (actualCellIndex >= dt.Columns.Count)
{
//dt.Columns.Add("Column" +
// actualCellIndex.ToString(), autoDetectType.type);
dt.Columns.Add("Column" + actualCellIndex.ToString());
//columns.Add(autoDetectType);
}
else
{
//dt.Columns[actualCellIndex].DataType = autoDetectType.type;
//columns[actualCellIndex] = autoDetectType;
}
actualCellIndex++;
}
}
//*************************
//Load Data
//*************************
for (int i = startIndex; i < rows.Count; i++)
{
DataRow row = dt.NewRow();
XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++)
{
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;
XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);
if (actualCellIndex >= dt.Columns.Count)
{
for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++)
{
//dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string)); columns.Add(getDefaultType());
dt.Columns.Add("Column" + actualCellIndex.ToString());
//columns.Add(getDefaultType());
} // ii
//ColumnType autoDetectType = getType(cell.SelectSingleNode("ss:Data", nsmgr));
dt.Columns.Add("Column" + actualCellIndex.ToString(),
typeof(string));
// columns.Add(autoDetectType);
}
if (data != null)
row[actualCellIndex] = data.InnerText;
actualCellIndex++;
}
dt.Rows.Add(row);
}
}
}
return ds;
}
}
来源:https://stackoverflow.com/questions/12897082/how-to-read-excel-xml-c