Multi threaded file processing with .NET

后端 未结 6 534
栀梦
栀梦 2021-01-30 15:21

There is a folder that contains 1000s of small text files. I aim to parse and process all of them while more files are being populated into the folder. My intention is to multit

6条回答
  •  说谎
    说谎 (楼主)
    2021-01-30 15:50

    I recommend that you queue a thread for each file and keep track of the running threads in a dictionary, launching a new thread when a thread completes, up to a maximum limit. I prefer to create my own threads when they can be long-running, and use callbacks to signal when they're done or encountered an exception. In the sample below I use a dictionary to keep track of the running worker instances. This way I can call into an instance if I want to stop work early. Callbacks can also be used to update a UI with progress and throughput. You can also dynamically throttle the running thread limit for added points.

    The example code is an abbreviated demonstrator, but it does run.

    class Program
    {
        static void Main(string[] args)
        {
            Supervisor super = new Supervisor();
            super.LaunchWaitingThreads();
    
            while (!super.Done) { Thread.Sleep(200); }
            Console.WriteLine("\nDone");
            Console.ReadKey();
        }
    }
    
    public delegate void StartCallbackDelegate(int idArg, Worker workerArg);
    public delegate void DoneCallbackDelegate(int idArg);
    
    public class Supervisor
    {
        Queue waitingThreads = new Queue();
        Dictionary runningThreads = new Dictionary();
        int maxThreads = 20;
        object locker = new object();
    
        public bool Done { 
            get { 
                lock (locker) {
                    return ((waitingThreads.Count == 0) && (runningThreads.Count == 0)); 
                } 
            } 
        }
    
        public Supervisor()
        {
            // queue up a thread for each file
            Directory.GetFiles("C:\\folder").ToList().ForEach(n => waitingThreads.Enqueue(CreateThread(n)));
        }
    
        Thread CreateThread(string fileNameArg)
        {
            Thread thread = new Thread(new Worker(fileNameArg, WorkerStart, WorkerDone).ProcessFile);
            thread.IsBackground = true;
            return thread;
        }
    
        // called when a worker starts
        public void WorkerStart(int threadIdArg, Worker workerArg)
        {
            lock (locker)
            {
                // update with worker instance
                runningThreads[threadIdArg] = workerArg;
            }
        }
    
        // called when a worker finishes
        public void WorkerDone(int threadIdArg)
        {
            lock (locker)
            {
                runningThreads.Remove(threadIdArg);
            }
            Console.WriteLine(string.Format("  Thread {0} done", threadIdArg.ToString()));
            LaunchWaitingThreads();
        }
    
        // launches workers until max is reached
        public void LaunchWaitingThreads()
        {
            lock (locker)
            {
                while ((runningThreads.Count < maxThreads) && (waitingThreads.Count > 0))
                {
                    Thread thread = waitingThreads.Dequeue();
                    runningThreads.Add(thread.ManagedThreadId, null); // place holder so count is accurate
                    thread.Start();
                }
            }
        }
    }
    
    public class Worker
    {
        string fileName;
        StartCallbackDelegate startCallback;
        DoneCallbackDelegate doneCallback;
        public Worker(string fileNameArg, StartCallbackDelegate startCallbackArg, DoneCallbackDelegate doneCallbackArg)
        {
            fileName = fileNameArg;
            startCallback = startCallbackArg;
            doneCallback = doneCallbackArg;
        }
    
        public void ProcessFile()
        {
            startCallback(Thread.CurrentThread.ManagedThreadId, this);
            Console.WriteLine(string.Format("Reading file {0} on thread {1}", fileName, Thread.CurrentThread.ManagedThreadId.ToString()));
            File.ReadAllBytes(fileName);
            doneCallback(Thread.CurrentThread.ManagedThreadId);
        }
    }
    

提交回复
热议问题