What is the best method to replace sequence of bytes in binary file to the same length of other bytes? The binary files will be pretty large, about 50 mb and should not be loade
Assuming you're trying to replace a known section of the file.
FileStream
with read/write accessSample code coming...
public static void ReplaceData(string filename, int position, byte[] data)
{
using (Stream stream = File.Open(filename, FileMode.Open))
{
stream.Position = position;
stream.Write(data, 0, data.Length);
}
}
If you're effectively trying to do a binary version of a string.Replace
(e.g. "always replace bytes { 51, 20, 34} with { 20, 35, 15 } then it's rather harder. As a quick description of what you'd do:
stream.Position -= buffer.Length - indexWithinBuffer;
and overwrite the dataSounds simple so far... but the tricky bit is if the data starts near the end of the buffer. You need to remember all potential matches and how far you've matched so far, so that if you get a match when you read the next buffer's-worth, you can detect it.
There are probably ways of avoiding this trickiness, but I wouldn't like to try to come up with them offhand :)
EDIT: Okay, I've got an idea which might help...
That way at some point, if the data is present, it will be completely within the buffer.
You'd need to be careful about where the stream was in order to get back to the right place, but I think this should work. It would be trickier if you were trying to find all matches, but at least the first match should be reasonably simple...
public static void BinaryReplace(string sourceFile, byte[] sourceSeq, string targetFile, byte[] targetSeq)
{
FileStream sourceStream = File.OpenRead(sourceFile);
FileStream targetStream = File.Create(targetFile);
try
{
int b;
long foundSeqOffset = -1;
int searchByteCursor = 0;
while ((b=sourceStream.ReadByte()) != -1)
{
if (sourceSeq[searchByteCursor] == b)
{
if (searchByteCursor == sourceSeq.Length - 1)
{
targetStream.Write(targetSeq, 0, targetSeq.Length);
searchByteCursor = 0;
foundSeqOffset = -1;
}
else
{
if (searchByteCursor == 0)
{
foundSeqOffset = sourceStream.Position - 1;
}
++searchByteCursor;
}
}
else
{
if (searchByteCursor == 0)
{
targetStream.WriteByte((byte) b);
}
else
{
targetStream.WriteByte(sourceSeq[0]);
sourceStream.Position = foundSeqOffset + 1;
searchByteCursor = 0;
foundSeqOffset = -1;
}
}
}
}
finally
{
sourceStream.Dispose();
targetStream.Dispose();
}
}
My solution :
/// <summary>
/// Copy data from a file to an other, replacing search term, ignoring case.
/// </summary>
/// <param name="originalFile"></param>
/// <param name="outputFile"></param>
/// <param name="searchTerm"></param>
/// <param name="replaceTerm"></param>
private static void ReplaceTextInBinaryFile(string originalFile, string outputFile, string searchTerm, string replaceTerm)
{
byte b;
//UpperCase bytes to search
byte[] searchBytes = Encoding.UTF8.GetBytes(searchTerm.ToUpper());
//LowerCase bytes to search
byte[] searchBytesLower = Encoding.UTF8.GetBytes(searchTerm.ToLower());
//Temporary bytes during found loop
byte[] bytesToAdd = new byte[searchBytes.Length];
//Search length
int searchBytesLength = searchBytes.Length;
//First Upper char
byte searchByte0 = searchBytes[0];
//First Lower char
byte searchByte0Lower = searchBytesLower[0];
//Replace with bytes
byte[] replaceBytes = Encoding.UTF8.GetBytes(replaceTerm);
int counter = 0;
using (FileStream inputStream = File.OpenRead(originalFile)) {
//input length
long srcLength = inputStream.Length;
using (BinaryReader inputReader = new BinaryReader(inputStream)) {
using (FileStream outputStream = File.OpenWrite(outputFile)) {
using (BinaryWriter outputWriter = new BinaryWriter(outputStream)) {
for (int nSrc = 0; nSrc < srcLength; ++nSrc)
//first byte
if ((b = inputReader.ReadByte()) == searchByte0
|| b == searchByte0Lower) {
bytesToAdd[0] = b;
int nSearch = 1;
//next bytes
for (; nSearch < searchBytesLength; ++nSearch)
//get byte, save it and test
if ((b = bytesToAdd[nSearch] = inputReader.ReadByte()) != searchBytes[nSearch]
&& b != searchBytesLower[nSearch]) {
break;//fail
}
//Avoid overflow. No need, in my case, because no chance to see searchTerm at the end.
//else if (nSrc + nSearch >= srcLength)
// break;
if (nSearch == searchBytesLength) {
//success
++counter;
outputWriter.Write(replaceBytes);
nSrc += nSearch - 1;
}
else {
//failed, add saved bytes
outputWriter.Write(bytesToAdd, 0, nSearch + 1);
nSrc += nSearch;
}
}
else
outputWriter.Write(b);
}
}
}
}
Console.WriteLine("ReplaceTextInBinaryFile.counter = " + counter);
}
You can use my BinaryUtility to search and replace one or more bytes without loading the entire file into memory like this:
var searchAndReplace = new List<Tuple<byte[], byte[]>>()
{
Tuple.Create(
BitConverter.GetBytes((UInt32)0xDEADBEEF),
BitConverter.GetBytes((UInt32)0x01234567)),
Tuple.Create(
BitConverter.GetBytes((UInt32)0xAABBCCDD),
BitConverter.GetBytes((UInt16)0xAFFE)),
};
using(var reader =
new BinaryReader(new FileStream(@"C:\temp\data.bin", FileMode.Open)))
{
using(var writer =
new BinaryWriter(new FileStream(@"C:\temp\result.bin", FileMode.Create)))
{
BinaryUtility.Replace(reader, writer, searchAndReplace);
}
}
BinaryUtility code:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
public static class BinaryUtility
{
public static IEnumerable<byte> GetByteStream(BinaryReader reader)
{
const int bufferSize = 1024;
byte[] buffer;
do
{
buffer = reader.ReadBytes(bufferSize);
foreach (var d in buffer) { yield return d; }
} while (bufferSize == buffer.Length);
}
public static void Replace(BinaryReader reader, BinaryWriter writer, IEnumerable<Tuple<byte[], byte[]>> searchAndReplace)
{
foreach (byte d in Replace(GetByteStream(reader), searchAndReplace)) { writer.Write(d); }
}
public static IEnumerable<byte> Replace(IEnumerable<byte> source, IEnumerable<Tuple<byte[], byte[]>> searchAndReplace)
{
foreach (var s in searchAndReplace)
{
source = Replace(source, s.Item1, s.Item2);
}
return source;
}
public static IEnumerable<byte> Replace(IEnumerable<byte> input, IEnumerable<byte> from, IEnumerable<byte> to)
{
var fromEnumerator = from.GetEnumerator();
fromEnumerator.MoveNext();
int match = 0;
foreach (var data in input)
{
if (data == fromEnumerator.Current)
{
match++;
if (fromEnumerator.MoveNext()) { continue; }
foreach (byte d in to) { yield return d; }
match = 0;
fromEnumerator.Reset();
fromEnumerator.MoveNext();
continue;
}
if (0 != match)
{
foreach (byte d in from.Take(match)) { yield return d; }
match = 0;
fromEnumerator.Reset();
fromEnumerator.MoveNext();
}
yield return data;
}
if (0 != match)
{
foreach (byte d in from.Take(match)) { yield return d; }
}
}
}