What's the best way to read a file backwards in C? I know at first you may be thinking that this is no use whatsoever, but most logs etc. append the most recent data at the end of the file. I want to read in text from the file backwards, buffering it into lines - that is
abc
def
ghi
should read ghi, def, abc in lines.
So far I have tried:
#include <stdio.h>
#include <stdlib.h>
void read_file(FILE *fileptr)
{
char currentchar = '\0';
int size = 0;
while( currentchar != '\n' )
{
currentchar = fgetc(fileptr); printf("%c\n", currentchar);
fseek(fileptr, -2, SEEK_CUR);
if( currentchar == '\n') { fseek(fileptr, -2, SEEK_CUR); break; }
else size++;
}
char buffer[size]; fread(buffer, 1, size, fileptr);
printf("Length: %d chars\n", size);
printf("Buffer: %s\n", buffer);
}
int main(int argc, char *argv[])
{
if( argc < 2) { printf("Usage: backwards [filename]\n"); return 1; }
FILE *fileptr = fopen(argv[1], "rb");
if( fileptr == NULL ) { perror("Error:"); return 1; }
fseek(fileptr, -1, SEEK_END); /* Seek to END of the file just before EOF */
read_file(fileptr);
return 0;
}
In an attempt to simply read one line and buffer it. Sorry that my code is terrible, I am getting so very confused. I know that you would normally allocate memory for the whole file and then read in the data, but for large files that constantly change I thought it would be better to read directly (especially if I want to search for text in a file).
Thanks in advance
* Sorry forgot to mention this will be used on Linux, so newlines are just NL without CR. *
I recommend a more portable (hopefully) way of file size determination since fseek(binaryStream, offset, SEEK_END)
is not guaranteed to work. See the code below.
I believe that files should be at least minimally buffered at the kernel level (e.g. buffering at least one block per file by default), so seeks should not incur significant amount of extra I/O and should only advance the file position internally. If the default buffering is not satisfactory, you may try to use setvbuf()
to speed up the I/O.
#include <limits.h>
#include <string.h>
#include <stdio.h>
/* File must be open with 'b' in the mode parameter to fopen() */
long fsize(FILE* binaryStream)
{
long ofs, ofs2;
int result;
if (fseek(binaryStream, 0, SEEK_SET) != 0 ||
fgetc(binaryStream) == EOF)
return 0;
ofs = 1;
while ((result = fseek(binaryStream, ofs, SEEK_SET)) == 0 &&
(result = (fgetc(binaryStream) == EOF)) == 0 &&
ofs <= LONG_MAX / 4 + 1)
ofs *= 2;
/* If the last seek failed, back up to the last successfully seekable offset */
if (result != 0)
ofs /= 2;
for (ofs2 = ofs / 2; ofs2 != 0; ofs2 /= 2)
if (fseek(binaryStream, ofs + ofs2, SEEK_SET) == 0 &&
fgetc(binaryStream) != EOF)
ofs += ofs2;
/* Return -1 for files longer than LONG_MAX */
if (ofs == LONG_MAX)
return -1;
return ofs + 1;
}
/* File must be open with 'b' in the mode parameter to fopen() */
/* Set file position to size of file before reading last line of file */
char* fgetsr(char* buf, int n, FILE* binaryStream)
{
long fpos;
int cpos;
int first = 1;
if (n <= 1 || (fpos = ftell(binaryStream)) == -1 || fpos == 0)
return NULL;
cpos = n - 1;
buf[cpos] = '\0';
for (;;)
{
int c;
if (fseek(binaryStream, --fpos, SEEK_SET) != 0 ||
(c = fgetc(binaryStream)) == EOF)
return NULL;
if (c == '\n' && first == 0) /* accept at most one '\n' */
break;
first = 0;
if (c != '\r') /* ignore DOS/Windows '\r' */
{
unsigned char ch = c;
if (cpos == 0)
{
memmove(buf + 1, buf, n - 2);
++cpos;
}
memcpy(buf + --cpos, &ch, 1);
}
if (fpos == 0)
{
fseek(binaryStream, 0, SEEK_SET);
break;
}
}
memmove(buf, buf + cpos, n - cpos);
return buf;
}
int main(int argc, char* argv[])
{
FILE* f;
long sz;
if (argc < 2)
{
printf("filename parameter required\n");
return -1;
}
if ((f = fopen(argv[1], "rb")) == NULL)
{
printf("failed to open file \'%s\'\n", argv[1]);
return -1;
}
sz = fsize(f);
// printf("file size: %ld\n", sz);
if (sz > 0)
{
char buf[256];
fseek(f, sz, SEEK_SET);
while (fgetsr(buf, sizeof(buf), f) != NULL)
printf("%s", buf);
}
fclose(f);
return 0;
}
I've only tested this on windows with 2 different compilers.
You could just pipe the input through the program tac
, which is like cat
but backwards!
There are quite a few ways you could do this, but reading a byte at a time is definitely one of poorer choices.
Reading the last, say, 4KB and then walking back up from the last character to the previous newline would be my choice.
Another option is to mmap
the file, and just pretend that the file is a lump of memory, and scan backwards in that. [You can tell mmap
you are reading backwards too, to make it prefetch data for you].
If the file is VERY large (several gigabytes), you may want to only use a small portion of the file in mmap
.
If you want to learn how to do it, here's a Debian/Ubuntu example (for other like RPM based distros, adapt as needed):
~$ which tac
/usr/bin/tac
~$ dpkg -S /usr/bin/tac
coreutils: /usr/bin/tac
~$ mkdir srcs
~$ cd srcs
~/srcs$ apt-get source coreutils
(clip apt-get output)
~/srcs$ ls
coreutils-8.13 coreutils_8.13-3.2ubuntu2.1.diff.gz coreutils_8.13-3.2ubuntu2.1.dsc coreutils_8.13.orig.tar.gz
~/srcs$ cd coreutils-8.13/
~/srcs/coreutils-8.13$ find . -name tac.c
./src/tac.c
~/srcs/coreutils-8.13$ less src/tac.c
That's not too long, a bit over 600 lines, and while it packs some advanced features, and uses functions from other sources, the reverse line buffering implementation seems to be in that tac.c
source file.
FSEEKing for every byte sounds PAINFULLY slow.
If you've got the memory, just read the entire file into memory and either reverse it or scan it backwards.
Another option would be Windows memory mapped files.
来源:https://stackoverflow.com/questions/14834267/reading-a-text-file-backwards-in-c