I have bunch of files and very file has a header of 5 lines. In the rest of the file, pair of line form an entry. I need to randomly select entry from these files. How can
Answer is in Python. Assuming you can read a whole file into memory.
#using python 2.6
import sys
import os
import itertools
import random
def main(directory, num_files=5, num_entries=5):
file_paths = os.listdir(directory)
# get a random sampling of the available paths
chosen_paths = random.sample(file_paths, num_files)
for path in chosen_paths:
chosen_entries = get_random_entries(path, num_entries)
for entry in chosen_entries:
# do something with your chosen entries
print entry
def get_random_entries(file_path, num_entries):
with open(file_path, 'r') as file:
# read the lines and slice off the headers
lines = file.readlines()[5:]
# group the lines into pairs (i.e. entries)
entries = list(itertools.izip_longest(*[iter(lines)]*2))
# return a random sampling of entries
return random.sample(entries, num_entries)
if __name__ == '__main__':
#use optparse here to do fancy things with the command line args
main(sys.argv[1:])
Links: itertools, random, optparse