Introduction
I just wanted to put out two ways to import data from Excel into a DataSet.
- Preferred - Importing from an XLS file using the OLE Jet engine (simple).
- UPDATED: I'm now setting the IMEX flag to 0. This may cause worse performance. Check out
http://www.connectionstrings.com/excel for more information.
- UPDATED: I switched it to use ACE driver if it has an XLSX extension. You will need to ensure ACE is installed on the server you deploy to.
- http://www.microsoft.com/en-us/download/details.aspx?id=13255
- UPDATED: Please refer to these links for good information about connecting to xls and xlsx files.
- http://www.connectionstrings.com/excel
- http://www.connectionstrings.com/excel-2007
- Importing from an Excel XML file. (The XML format that Excel uses, not just any XML file.)
- Note: This is a long-winded custom solution. Should work, but might require tweaks.
- This works well if you're sure the data will be valid, or if you don't require it to do type-detection (flag for this on procedure).
- Exporting to an Excel XML file can be found here.
Using the code
Download the file for specifics, but here's a summary:
XLS Import
public static DataSet ImportExcelXLS(string FileName, bool hasHeaders) {
string HDR = hasHeaders ? "Yes" : "No";
string strConn;
if (FileName.Substring(FileName.LastIndexOf('.')).ToLower() == ".xlsx")
strConn = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + FileName + ";Extended Properties=\"Excel 12.0;HDR=" + HDR + ";IMEX=0\"";
else
strConn = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + FileName + ";Extended Properties=\"Excel 8.0;HDR=" + HDR + ";IMEX=0\"";
DataSet output = new DataSet();
using (OleDbConnection conn = new OleDbConnection(strConn)) {
conn.Open();
DataTable schemaTable = conn.GetOleDbSchemaTable(
OleDbSchemaGuid.Tables, new object[] { null, null, null, "TABLE" });
foreach (DataRow schemaRow in schemaTable.Rows) {
string sheet = schemaRow["TABLE_NAME"].ToString();
if (!sheet.EndsWith("_")) {
try {
OleDbCommand cmd = new OleDbCommand("SELECT * FROM [" + sheet + "]", conn);
cmd.CommandType = CommandType.Text;
DataTable outputTable = new DataTable(sheet);
output.Tables.Add(outputTable);
new OleDbDataAdapter(cmd).Fill(outputTable);
} catch (Exception ex) {
throw new Exception(ex.Message + string.Format("Sheet:{0}.File:F{1}", sheet, FileName), ex);
}
}
}
}
return output;
}
Excel XML Import (Summary)
public static DataSet ImportExcelXML(Stream inputFileStream,
bool hasHeaders, bool autoDetectColumnType) {
XmlDocument doc = new XmlDocument();
doc.Load(new XmlTextReader(inputFileStream));
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");
DataSet ds = new DataSet();
foreach (XmlNode node in
doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr)) {
DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
ds.Tables.Add(dt);
XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
if (rows.Count > 0) {
List<ColumnType> columns = new List<ColumnType>();
int startIndex = 0;
if (hasHeaders) {
foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr)) {
columns.Add(new ColumnType(typeof(string)));
dt.Columns.Add(data.InnerText, typeof(string));
}
startIndex++;
}
if (autoDetectColumnType && rows.Count > 0) {
XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++) {
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex =
int.Parse(cell.Attributes["ss:Index"].Value) - 1;
ColumnType autoDetectType =
getType(cell.SelectSingleNode("ss:Data", nsmgr));
if (actualCellIndex >= dt.Columns.Count) {
dt.Columns.Add("Column" +
actualCellIndex.ToString(), autoDetectType.type);
columns.Add(autoDetectType);
} else {
dt.Columns[actualCellIndex].DataType = autoDetectType.type;
columns[actualCellIndex] = autoDetectType;
}
actualCellIndex++;
}
}
for (int i = startIndex; i < rows.Count; i++) {
DataRow row = dt.NewRow();
XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
int actualCellIndex = 0;
for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++) {
XmlNode cell = cells[cellIndex];
if (cell.Attributes["ss:Index"] != null)
actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;
XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);
if (actualCellIndex >= dt.Columns.Count) {
for (int i = dt.Columns.Count; i < actualCellIndex; i++) {
dt.Columns.Add("Column" +
actualCellIndex.ToString(), typeof(string));
columns.Add(getDefaultType());
}
ColumnType autoDetectType =
getType(cell.SelectSingleNode("ss:Data", nsmgr));
dt.Columns.Add("Column" + actualCellIndex.ToString(),
typeof(string));
columns.Add(autoDetectType);
}
if (data != null)
row[actualCellIndex] = data.InnerText;
actualCellIndex++;
}
dt.Rows.Add(row);
}
}
}
return ds;
}
?>
//<?mso-application progid="Excel.Sheet"?>
//<Workbook>
// <Worksheet ss:Name="Sheet1">
// <Table>
// <Row>
// <Cell><Data ss:Type="String">Item Number</Data></Cell>
// <Cell><Data ss:Type="String">Description</Data></Cell>
// <Cell ss:StyleID="s21"><Data ss:Type="String">Item Barcode</Data></Cell>
// </Row>
// </Worksheet>
//</Workbook>