Map Reduce with F# agents

后端 未结 1 580
攒了一身酷
攒了一身酷 2020-12-01 16:45

After playing with F# agents I tried to do a map reduce using them.

The basic structure I use is:

  • map supervisor which queues up all the work to do i
相关标签:
1条回答
  • 2020-12-01 17:13

    I like to use MailboxProcessor for the reduce part of the algorithm, and async block that's invoked with Async.Parallel for the map part. It makes things more explicit, giving you finer control over exception handling, timeouts, and cancellation.

    The following code was designed with Brian's help, and with the help of his excellent F# block highlighting "F# Depth Colorizer" plug-in for VS2010.

    This code is meant to pull RSS feeds from yahoo weather server in a map-reduce pattern. It demonstrates how we can control execution flow from the outside of actual algorithm.

    fetchWeather is the map part, and mailboxLoop is the reduce part of the algorithm.

    #r "System.Xml.Linq.dll"
    
    #r "FSharp.PowerPack.dll"
    
    open System
    open System.Diagnostics
    open System.IO
    open System.Linq
    open System.Net
    open System.Xml.Linq
    
    open Microsoft.FSharp.Control.WebExtensions 
    
    type Weather (city, region, temperature) = class
       member x.City = city
       member x.Region = region
       member x.Temperature : int = temperature
    
       override this.ToString() =
          sprintf "%s, %s: %d F" this.City this.Region this.Temperature
    end
    
    type MessageForActor = 
       | ProcessWeather of Weather
       | ProcessError of int
       | GetResults of (Weather * Weather * Weather list) AsyncReplyChannel
    
    let parseRss woeid (rssStream : Stream) =
       let xn str = XName.Get str
       let yweather elementName = XName.Get(elementName, "http://xml.weather.yahoo.com/ns/rss/1.0")
    
       let channel = (XDocument.Load rssStream).Descendants(xn "channel").First()
       let location   = channel.Element(yweather "location")
       let condition  = channel.Element(xn "item").Element(yweather "condition")
    
       //  If the RSS server returns error, condition XML element won't be available.
       if not(condition = null) then
          let temperature = Int32.Parse(condition.Attribute(xn "temp").Value)
          ProcessWeather(new Weather(
                        location.Attribute(xn "city").Value,
                        location.Attribute(xn "region").Value,
                        temperature))
       else
          ProcessError(woeid)
    
    let fetchWeather (actor : MessageForActor MailboxProcessor) woeid =
       async {
          let rssAddress = sprintf "http://weather.yahooapis.com/forecastrss?w=%d&u=f" woeid
          let webRequest =  WebRequest.Create rssAddress
          use! response = webRequest.AsyncGetResponse()
          use responseStream = response.GetResponseStream()
          let weather = parseRss woeid responseStream
          //do! Async.Sleep 1000 // enable this line to see amplified timing that proves concurrent flow
          actor.Post(weather)
       }
    
    let mailboxLoop initialCount =
       let chooseCityByTemperature op (x : Weather) (y : Weather) =
          if op x.Temperature y.Temperature then x else y
    
       let sortWeatherByCityAndState (weatherList : Weather list) =
          weatherList
          |> List.sortWith (fun x y -> x.City.CompareTo(y.City))
          |> List.sortWith (fun x y -> x.Region.CompareTo(y.Region))
    
       MailboxProcessor.Start(fun inbox ->
          let rec loop minAcc maxAcc weatherList remaining =
             async {
                let! message = inbox.Receive()
                let remaining = remaining - 1
    
                match message with
                | ProcessWeather weather ->
                   let colderCity = chooseCityByTemperature (<) minAcc weather
                   let warmerCity = chooseCityByTemperature (>) maxAcc weather
                   return! loop colderCity warmerCity (weather :: weatherList) remaining
                | ProcessError woeid ->
                   let errorWeather = new Weather(sprintf "Error with woeid=%d" woeid, "ZZ", 99999)
                   return! loop minAcc maxAcc (errorWeather :: weatherList) remaining
                | GetResults replyChannel ->
                   replyChannel.Reply(minAcc, maxAcc, sortWeatherByCityAndState weatherList)
             }
    
          let minValueInitial = new Weather("", "", Int32.MaxValue)
          let maxValueInitial = new Weather("", "", Int32.MinValue)
          loop minValueInitial maxValueInitial [] initialCount
          )
    
    let RunSynchronouslyWithExceptionAndTimeoutHandlers computation =
       let timeout = 30000
       try
          Async.RunSynchronously(Async.Catch(computation), timeout)
          |> function Choice1Of2 answer               -> answer |> ignore
                    | Choice2Of2 (except : Exception) -> printfn "%s" except.Message; printfn "%s" except.StackTrace; exit -4
       with
       | :? System.TimeoutException -> printfn "Timed out waiting for results for %d seconds!" <| timeout / 1000; exit -5
    
    let main =
       // Should have script name, sync/async select, and at least one woeid
       if fsi.CommandLineArgs.Length < 3 then
          printfn "Expecting at least two arguments!"
          printfn "There were %d arguments" (fsi.CommandLineArgs.Length - 1)
          exit -1
    
       let woeids =
          try
             fsi.CommandLineArgs
             |> Seq.skip 2 // skip the script name and sync/async select
             |> Seq.map Int32.Parse
             |> Seq.toList
          with
          | except -> printfn "One of supplied arguments was not an integer: %s" except.Message; exit -2
    
       let actor = mailboxLoop woeids.Length
    
       let processWeatherItemsConcurrently woeids =
          woeids
          |> Seq.map (fetchWeather actor)
          |> Async.Parallel
          |> RunSynchronouslyWithExceptionAndTimeoutHandlers
    
       let processOneWeatherItem woeid =
          woeid
          |> fetchWeather actor
          |> RunSynchronouslyWithExceptionAndTimeoutHandlers
    
       let stopWatch = new Stopwatch()
       stopWatch.Start()
       match fsi.CommandLineArgs.[1].ToUpper() with
       | "C" -> printfn "Concurrent execution:  "; processWeatherItemsConcurrently woeids
       | "S" -> printfn "Synchronous execution: "; woeids |> Seq.iter processOneWeatherItem
       | _   -> printfn "Unexpected run options!"; exit -3
    
       let (min, max, weatherList) = actor.PostAndReply GetResults
       stopWatch.Stop()
       assert (weatherList.Length = woeids.Length)
    
       printfn "{"
       weatherList |> List.iter (printfn "   %O")
       printfn "}"
       printfn "Coldest place: %O" min
       printfn "Hottest place: %O" max
       printfn "Completed in %d millisec" stopWatch.ElapsedMilliseconds
    
    main
    
    0 讨论(0)
提交回复
热议问题