I\'m writing a log file viewer for a web application and for that I want to paginate through the lines of the log file. The items in the file are line based with the newest
Here is my answer. Pure python. Using timeit it seems pretty fast. Tailing 100 lines of a log file that has 100,000 lines:
>>> timeit.timeit('tail.tail(f, 100, 4098)', 'import tail; f = open("log.txt", "r");', number=10)
0.0014600753784179688
>>> timeit.timeit('tail.tail(f, 100, 4098)', 'import tail; f = open("log.txt", "r");', number=100)
0.00899195671081543
>>> timeit.timeit('tail.tail(f, 100, 4098)', 'import tail; f = open("log.txt", "r");', number=1000)
0.05842900276184082
>>> timeit.timeit('tail.tail(f, 100, 4098)', 'import tail; f = open("log.txt", "r");', number=10000)
0.5394978523254395
>>> timeit.timeit('tail.tail(f, 100, 4098)', 'import tail; f = open("log.txt", "r");', number=100000)
5.377126932144165
Here is the code:
import os
def tail(f, lines=1, _buffer=4098):
"""Tail a file and get X lines from the end"""
# place holder for the lines found
lines_found = []
# block counter will be multiplied by buffer
# to get the block size from the end
block_counter = -1
# loop until we find X lines
while len(lines_found) < lines:
try:
f.seek(block_counter * _buffer, os.SEEK_END)
except IOError: # either file is too small, or too many lines requested
f.seek(0)
lines_found = f.readlines()
break
lines_found = f.readlines()
# we found enough lines, get out
# Removed this line because it was redundant the while will catch
# it, I left it for history
# if len(lines_found) > lines:
# break
# decrement the block counter to get the
# next X bytes
block_counter -= 1
return lines_found[-lines:]
Here is a pretty simple implementation:
with open('/etc/passwd', 'r') as f:
try:
f.seek(0,2)
s = ''
while s.count('\n') < 11:
cur = f.tell()
f.seek((cur - 10))
s = f.read(10) + s
f.seek((cur - 10))
print s
except Exception as e:
f.readlines()
This is my version of tailf
import sys, time, os
filename = 'path to file'
try:
with open(filename) as f:
size = os.path.getsize(filename)
if size < 1024:
s = size
else:
s = 999
f.seek(-s, 2)
l = f.read()
print l
while True:
line = f.readline()
if not line:
time.sleep(1)
continue
print line
except IOError:
pass
There is very useful module that can do this:
from file_read_backwards import FileReadBackwards
with FileReadBackwards("/tmp/file", encoding="utf-8") as frb:
# getting lines by lines starting from the last line up
for l in frb:
print(l)
The simplest way is to use deque
:
from collections import deque
def tail(filename, n=10):
with open(filename) as f:
return deque(f, n)
based on S.Lott's top voted answer (Sep 25 '08 at 21:43), but fixed for small files.
def tail(the_file, lines_2find=20):
the_file.seek(0, 2) #go to end of file
bytes_in_file = the_file.tell()
lines_found, total_bytes_scanned = 0, 0
while lines_2find+1 > lines_found and bytes_in_file > total_bytes_scanned:
byte_block = min(1024, bytes_in_file-total_bytes_scanned)
the_file.seek(-(byte_block+total_bytes_scanned), 2)
total_bytes_scanned += byte_block
lines_found += the_file.read(1024).count('\n')
the_file.seek(-total_bytes_scanned, 2)
line_list = list(the_file.readlines())
return line_list[-lines_2find:]
#we read at least 21 line breaks from the bottom, block by block for speed
#21 to ensure we don't get a half line
Hope this is useful.