问题
I'm trying to read some files with ReadLine
, but my file have some break lines that I need to catch (not all of them), and I don't know how to get them in the same array, neither in any other array with these separators... because... ReadLine
reads lines, and break these lines, huh?
I can't replace these because I need to check it after the process, so I need to get the breaklines AND the content after that. That's the problem. How can I do that?
Here's my code:
public class ReadFile
{
string extension;
string filename;
System.IO.StreamReader sr;
public ReadFile(string arquivo, System.IO.StreamReader sr)
{
string ext = Path.GetExtension(arquivo);
sr = new StreamReader(arquivo, System.Text.Encoding.Default);
this.sr = sr;
this.extension = ext;
this.filename = Path.GetFileNameWithoutExtension(arquivo);
if (ext.Equals(".EXP", StringComparison.OrdinalIgnoreCase))
{
ReadEXP(arquivo);
}
else MessageBox.Show("Extensão de arquivo não suportada: "+ext);
}
public void ReadEXP(string arquivo)
{
string line = sr.ReadLine();
string[] words;
string[] Separators = new string[] { "<Segment>", "</Segment>", "<Source>", "</Source>", "<Target>", "</Target>" };
string ID = null;
string Source = null;
string Target = null;
DataBase db = new DataBase();
//db.CreateTable_EXP(filename);
db.CreateTable_EXP();
while ((line = sr.ReadLine()) != null)
{
try
{
if (line.Contains("<Segment>"))
{
ID = "";
words = line.Split(Separators, StringSplitOptions.None);
ID = words[0];
for (int i = 1; i < words.Length; i++ )
ID += words[i];
MessageBox.Show("Segment[" + words.Length + "]: " + ID);
}
if (line.Contains("<Source>"))
{
Source = "";
words = line.Split(Separators, StringSplitOptions.None);
Source = words[0];
for (int i = 1; i < words.Length; i++)
Source += words[i];
MessageBox.Show("Source[" + words.Length + "]: " + Source);
}
if (line.Contains("<Target>"))
{
Target = "";
words = line.Split(Separators, StringSplitOptions.None);
Target = words[0];
for (int i = 1; i < words.Length; i++)
Target += words[i];
MessageBox.Show("Target[" + words.Length + "]: " + Target);
db.PopulateTable_EXP(ID, Source, Target);
MessageBox.Show("ID: " + ID + "\nSource: " + Source + "\nTarget: " + Target);
}
}
catch (IndexOutOfRangeException e)
{
MessageBox.Show(e.Message.ToString());
MessageBox.Show("ID: " + ID + "\nSource: " + Source + "\nTarget: " + Target);
}
}
return;
}
回答1:
If you are trying to read XML, try using the built in libaries, here is a simple example of loading a section of XML with <TopLevelTag>
in it.
var xmlData = XDocument.Load(@"C:\folder\file.xml").Element("TopLevelTag");
if (xmlData == null) throw new Exception("Failed To Load XML");
Here is a tidy way to get content without it throwing an exception if missing from the XML.
var xmlBit = (string)xmlData.Element("SomeSubTag") ?? "";
If you really have to roll your own, then look at examples for CSV parsers, where ReadBlock can be used to get the raw data including line breaks.
private char[] chunkBuffer = new char[4096];
var fileStream = new System.IO.StreamReader(new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite));
var chunkLength = fileStream.ReadBlock(chunkBuffer, 0, chunkBuffer.Length);
来源:https://stackoverflow.com/questions/21465568/alternative-to-readline