I have this code that should put an event in a queue each time an external program (TCPdump) creates a *.pcap file in my directory. My problem is that I always get an empty que
There is an excellent library which provides concurrent access to the items within that queue. The queue is also persistent[file based as well as database based], so if the program crashes, you can still consume events from the point where the program crashed.
persist-queue
This is working for me (I got the main idea from this answer, thanks!) but notice that I consider this a workaround, so if someone has a better solution to this or can better explain the reason of such behavior in Python, please do not hesitate to answer!
My guess is that I had two main problems:
- I was starting Watchdog process inside another thread (and that was blocking somehow my queue consuming thread).
- Python threading does not work really in parallel and therefore starting an independent process was necessary.
Here my code:
import time
import pyshark
import threading
import logging
import os
from queue import Queue
from multiprocessing import Process, Pool
from watchdog.observers import Observer, api
from watchdog.events import PatternMatchingEventHandler
from concurrent.futures import ThreadPoolExecutor
class Handler(PatternMatchingEventHandler):
patterns = ["*.pcap", "*.pcapng"]
def __init__(self, queue):
PatternMatchingEventHandler.__init__(self)
self.queue = queue
def process(self, event):
self.queue.put(event.src_path)
logging.info(f"Storing message: {self.queue.qsize()}")
print("Producer queue: ", list(self.queue.queue))
def on_created(self, event):
#wait that the transfer of the file is finished before processing it
file_size = -1
while file_size != os.path.getsize(event.src_path):
file_size = os.path.getsize(event.src_path)
time.sleep(1)
self.process(event)
def ConsumeQueue(consumerq):
while True:
if not consumerq.empty():
pool = Pool()
pool.apply_async(ReadPcapFiles, (consumerq.get(), ))
else:
time.sleep(1)
def ReadPcapFiles(get_event):
createdFile = get_event
print(f"This is my event in ReadPacapFile {createdFile}")
countPacket = 0
bandwidth = 0
pcapfile = pyshark.FileCapture(createdFile)
for packet in pcapfile:
countPacket +=1
bandwidth = bandwidth + int(packet.length)
print(f"Packet nr {countPacket}")
print(f"Byte per second {bandwidth}")
if __name__ == '__main__':
format = "%(asctime)s: %(message)s"
logging.basicConfig(format=format, level=logging.INFO,datefmt="%H:%M:%S")
logging.getLogger().setLevel(logging.DEBUG)
queue = Queue()
path = 'C:\\...'
worker = threading.Thread(target=ConsumeQueue, args=(queue, ), daemon=True)
print("About to start worker")
worker.start()
event_handler = Handler(queue)
observer = Observer()
observer.schedule(event_handler, path, recursive=False)
print("About to start observer")
observer.start()
try:
while True:
time.sleep(1)
except Exception as error:
observer.stop()
print("Error: " + str(error))
observer.join()