需求
把不同客户提供Excel 直接导入到系统中生成对应的收货单或是出货单。后端创建收货端和出货单的接口已经有现成的webservice或是标准的xml;这类需要做的就是把客户提供不同种类的Excel mapping成标准格式。
要重点解决问题
不同格式的excel如何找到对应的数据项,比如一个Excel中需要字段分别在不同的sheet或是不同的位置上。
解决方案
第一个定位配置信息,sheet-name:数据从哪个sheet中读取,默认sheet1,start-tag:固定标识,查找Excel中一些特殊文本信息来定位具体的celladdress(行,列),data-offset:设置一个偏移量,在找到具体内容的地址后可能真正需要数据在后面,那就需要设置一个偏移量待读取信息, end-tag:结束位置,用于循环读取的范围。
第二个映射字段明,XmlNode Name:就是目标字段明,data-field:Excel中对应的字段名称(含有表头的行),data-type:目标字段的类型,data-formatter:格式化截取excel Cell中的内容 比如:需要通过substring,或splitl来取其中的内容。
第三个表示单个表头,还是循环的表体 replicate="true" 表示需要循环读取。
大致的处理过程
先根据配置规则把Excel中需要的信息提取出来并生成一个XML文档,如果标准的接口XML结构和数据都比较复杂,那么还需要使用XSLT语言来做更复杂的mapping,满足后端服务的要求。
实际运行的过程
原始Excel
配置规则XML
转换成初步XML
实现的代码
代码非常的简单,还是第一个版本,以后再慢慢优化和重构
1 class Program
2 {
3 static async Task Main(string[] args)
4 {
5 var path = @"d:\9C箱单0000880680.xlsx";
6 var configpath = @"d:\XslImportRule1.xml";
7 var xdoc = XDocument.Load(configpath);
8 var root = xdoc.Root.Name;
9 var descxml = new XDocument();
10 descxml.Add(new XElement(xdoc.Root.Name));
11 var workbook = new XSSFWorkbook(path);
12 Process(workbook,null, xdoc.Root, 0, descxml.Root,null);
13 descxml.Save("d:\\output.xml");
14 return;
15 }
16 static void Process(IWorkbook book, ISheet sheet, XElement element, int depth, XElement root,DataRow dr)
17 {
18 var pelment = element.Parent;
19 var name = element.Name;
20 var atts = element.Attributes();
21 var replicate = atts.Where(x => x.Name == "replicate").FirstOrDefault()?.Value;
22 var sheetname = atts.Where(x => x.Name == "sheet-name").FirstOrDefault()?.Value;
23 var starttag = atts.Where(x => x.Name == "start-tag").FirstOrDefault()?.Value;
24 var start = atts.Where(x => x.Name == "start").FirstOrDefault()?.Value;
25 var endtag = atts.Where(x => x.Name == "end-tag").FirstOrDefault()?.Value;
26 var end = atts.Where(x => x.Name == "end").FirstOrDefault()?.Value;
27 var fieldname = atts.Where(x => x.Name == "data-field").Select(x => x.Value).FirstOrDefault();
28 var datatype = atts.Where(x => x.Name == "data-type").Select(x => x.Value);
29 var defaultvalue = atts.Where(x => x.Name == "data-default").FirstOrDefault()?.Value;
30 var formatter = atts.Where(x => x.Name == "data-formatter").FirstOrDefault()?.Value;
31 var offset = atts.Where(x => x.Name == "data-offset").FirstOrDefault()?.Value;
32 XElement copyelement = null;
33
34 //if (element.Parent != null )
35 //{
36 // copyelement = new XElement(name);
37 // root.Add(copyelement);
38 //}
39 if (!string.IsNullOrEmpty(replicate) && !string.IsNullOrEmpty(sheetname)) {
40 sheet = book.GetSheet(sheetname);
41 }
42
43 if (!element.HasElements)
44 {
45 copyelement = new XElement(name);
46 root.Add(copyelement);
47 // element is child with no descendants
48 if (dr == null)
49 {
50 CellAddress celladdress = null;
51 if (!string.IsNullOrEmpty(starttag))
52 {
53 celladdress = findXslx(sheet, starttag);
54 }
55 else if (!string.IsNullOrEmpty(start))
56 {
57 celladdress = new CellAddress(new CellReference(start));
58 }
59 if (celladdress != null)
60 {
61 var r = 0;
62 var c = 0;
63 if (!string.IsNullOrEmpty(offset))
64 {
65 var sp = offset.Split(';');
66 foreach (var ts in sp)
67 {
68 var sparray = ts.Split(':');
69 if (sparray[0].Equals("c", StringComparison.OrdinalIgnoreCase))
70 {
71 c = Convert.ToInt32(sparray[1]);
72 }
73 else
74 {
75 r = Convert.ToInt32(sparray[1]);
76 }
77 }
78 }
79 var cell = sheet.GetRow(celladdress.Row + r).GetCell(celladdress.Column + c);
80 var val = getCellValue(cell);
81 if (string.IsNullOrEmpty(val) && !string.IsNullOrEmpty(defaultvalue))
82 {
83 val = defaultvalue;
84 }
85 if (!string.IsNullOrEmpty(val) && !string.IsNullOrEmpty(formatter))
86 {
87 var codescript = formatter.Replace("$", "\"" + val + "\"");
88 var fval = CSharpScript.EvaluateAsync<string>(codescript).Result;
89 val = fval;
90 }
91 copyelement.SetValue(val);
92 }
93 else if (!string.IsNullOrEmpty(defaultvalue))
94 {
95 copyelement.SetValue(defaultvalue);
96 }
97 }
98 else
99 {
100 if(dr.Table.Columns.Contains(fieldname))
101 {
102 var val = dr[fieldname].ToString();
103 if (string.IsNullOrEmpty(val) && !string.IsNullOrEmpty(defaultvalue))
104 {
105 val = defaultvalue;
106
107 }
108 copyelement.SetValue(val);
109 }
110 else if(!string.IsNullOrEmpty(defaultvalue))
111 {
112 copyelement.SetValue(defaultvalue);
113 }
114 }
115
116 }
117 else
118 {
119 depth++;
120 if (replicate == "true")
121 {
122 var datatable= filldatatable(sheet, starttag, start, endtag, end, offset);
123 if (datatable.Rows.Count > 0)
124 {
125 foreach (DataRow datarow in datatable.Rows)
126 {
127 copyelement = new XElement(name);
128 foreach (var child in element.Elements())
129 {
130 if (copyelement != null)
131 {
132 Process(book, sheet, child, depth, copyelement, datarow);
133 }
134 else
135 {
136 Process(book, sheet, child, depth, root, datarow);
137 }
138
139 }
140 root.Add(copyelement);
141 }
142 }
143 }
144 else
145 {
146 if (element.Parent != null)
147 {
148 copyelement = new XElement(name);
149 root.Add(copyelement);
150 }
151 foreach (var child in element.Elements())
152 {
153 if (copyelement != null)
154 {
155 Process(book,sheet, child, depth, copyelement,null);
156 }
157 else
158 {
159 Process(book,sheet, child, depth, root,null);
160 }
161
162 }
163 }
164
165 depth--;
166 }
167 }
168
169 private static DataTable filldatatable(ISheet sheet, string starttag, string start, string endtag, string end, string offset)
170 {
171 CellAddress startaddress = null;
172 CellAddress endaddress = null;
173 if (!string.IsNullOrEmpty(starttag))
174 {
175 startaddress = findXslx(sheet, starttag);
176 }
177 else if (!string.IsNullOrEmpty(start))
178 {
179 startaddress = new CellAddress(new CellReference(start));
180 }
181 else
182 {
183 startaddress = new CellAddress(new CellReference("A0"));
184 }
185 if (!string.IsNullOrEmpty(endtag))
186 {
187 endaddress = findXslx(sheet, endtag);
188 }
189 else if (!string.IsNullOrEmpty(end))
190 {
191 endaddress = new CellAddress(new CellReference(end));
192 }
193 else
194 {
195 endaddress = null;
196 }
197 var offsetr = 0;
198 var offsetc = 0;
199 if (!string.IsNullOrEmpty(offset))
200 {
201 var sp = offset.Split(';');
202 foreach (var ts in sp)
203 {
204 var sparray = ts.Split(':');
205 if (sparray[0].Equals("c", StringComparison.OrdinalIgnoreCase))
206 {
207 offsetc = Convert.ToInt32(sparray[1]);
208 }
209 else
210 {
211 offsetr = Convert.ToInt32(sparray[1]);
212 }
213 }
214 }
215 var firstrow = startaddress == null ? sheet.FirstRowNum : startaddress.Row + offsetr;
216 var lastrow = (endaddress == null) ? sheet.LastRowNum : endaddress.Row;
217 var table = new DataTable();
218 var lastcell = 0; //row.LastCellNum;
219 var firstcell = 0; //row.FirstCellNum + offsetc;
220 for (int r = firstrow; r < lastrow; r++)
221 {
222 var row = sheet.GetRow(r);
223 if (row == null) continue;
224
225 if (r == firstrow)
226 {
227 lastcell = row.LastCellNum;
228 firstcell = row.FirstCellNum + offsetc;
229 for (int c = firstcell; c < lastcell; c++)
230 {
231 var cell = row.GetCell(c);
232 if (cell == null) continue;
233 var strval = getCellValue(cell).Trim();
234 if (!string.IsNullOrEmpty(strval))
235 {
236 table.Columns.Add(new DataColumn(strval));
237 }
238 }
239 }
240 else
241 {
242 var dataRow = table.NewRow();
243 var array = new string[table.Columns.Count];
244 //for (var c = 0; c < table.Columns.Count; c++)
245 //{
246 // var cell = row.GetCell(firstcell+c);
247 // var val = getCellValue(cell).Trim();
248 // array[c] = val;
249 //}
250 for (int c = firstcell; c < lastcell; c++)
251 {
252 var cell = row.GetCell(c);
253 var val = getCellValue(cell).Trim();
254 array[c- firstcell] = val;
255 }
256 dataRow.ItemArray = array;
257 table.Rows.Add(dataRow);
258 }
259 }
260 return table;
261 }
262
263 private static CellAddress findXslx(ISheet sheet, string key)
264 {
265 var lastrow = sheet.LastRowNum;
266 var firstrow = sheet.FirstRowNum;
267 for (int r = firstrow; r < lastrow; r++)
268 {
269 var row = sheet.GetRow(r);
270 if (row == null) continue;
271 var lastcell = row.LastCellNum;
272 var firstcell = row.FirstCellNum;
273 for (int c = firstcell; c < lastcell; c++)
274 {
275 var cell = row.GetCell(c);
276 if (cell == null) continue;
277 var strval = getCellValue(cell).Trim();
278 //if (strval.Trim().Equals(key, StringComparison.OrdinalIgnoreCase))
279 //{
280 // return cell.Address;
281 //}
282 if (match(key, strval))
283 {
284 return cell.Address;
285 }
286 }
287 }
288 return null;
289 }
290 private static string getCellValue(ICell cell)
291 {
292 if (cell == null)
293 {
294 return string.Empty;
295 }
296 var dataFormatter = new DataFormatter(CultureInfo.CurrentCulture);
297
298 // If this is not part of a merge cell,
299 // just get this cell's value like normal.
300 if (!cell.IsMergedCell)
301 {
302 return dataFormatter.FormatCellValue(cell);
303 }
304
305 // Otherwise, we need to find the value of this merged cell.
306 else
307 {
308 // Get current sheet.
309 var currentSheet = cell.Sheet;
310
311 // Loop through all merge regions in this sheet.
312 for (int i = 0; i < currentSheet.NumMergedRegions; i++)
313 {
314 var mergeRegion = currentSheet.GetMergedRegion(i);
315
316 // If this merged region contains this cell.
317 if (mergeRegion.FirstRow <= cell.RowIndex && cell.RowIndex <= mergeRegion.LastRow &&
318 mergeRegion.FirstColumn <= cell.ColumnIndex && cell.ColumnIndex <= mergeRegion.LastColumn)
319 {
320 // Find the top-most and left-most cell in this region.
321 var firstRegionCell = currentSheet.GetRow(mergeRegion.FirstRow)
322 .GetCell(mergeRegion.FirstColumn);
323
324 // And return its value.
325 return dataFormatter.FormatCellValue(firstRegionCell);
326 }
327 }
328 // This should never happen.
329 throw new Exception("Cannot find this cell in any merged region");
330 }
331 }
332
333 static bool match(string pattern, string input)
334 {
335 if (String.Compare(pattern, input) == 0)
336 {
337 return true;
338 }
339 else if (String.IsNullOrEmpty(input))
340 {
341 if (String.IsNullOrEmpty(pattern.Trim(new Char[1] { '*' })))
342 {
343 return true;
344 }
345 else
346 {
347 return false;
348 }
349 }
350 else if (pattern.Length == 0)
351 {
352 return false;
353 }
354 else if (pattern[0] == '?')
355 {
356 return match(pattern.Substring(1), input.Substring(1));
357 }
358 else if (pattern[pattern.Length - 1] == '?')
359 {
360 return match(pattern.Substring(0, pattern.Length - 1),
361 input.Substring(0, input.Length - 1));
362 }
363 else if (pattern[0] == '*')
364 {
365 if (match(pattern.Substring(1), input))
366 {
367 return true;
368 }
369 else
370 {
371 return match(pattern, input.Substring(1));
372 }
373 }
374 else if (pattern[pattern.Length - 1] == '*')
375 {
376 if (match(pattern.Substring(0, pattern.Length - 1), input))
377 {
378 return true;
379 }
380 else
381 {
382 return match(pattern, input.Substring(0, input.Length - 1));
383 }
384 }
385 else if (pattern[0] == input[0])
386 {
387 return match(pattern.Substring(1), input.Substring(1));
388 }
389 return false;
390 }
391 }
392 }
代码库
https://github.com/neozhu/excelcompleximport
最近还会继续更新
来源:oschina
链接:https://my.oschina.net/u/4305379/blog/3274595