Spark Program finding Popular HashTags from twiiter

ぐ巨炮叔叔 提交于 2019-12-24 19:04:47

问题


I am trying to run this spark program which will get me the popular hashtags currently on twitter and will only show the top 10 hashtags.
I have supplied the twiiter access token, Secret & the Customer Key, Secret via a text File.

import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.twitter.TwitterUtils

object PopularHashtags {

  def setupLogging() = {
    import org.apache.log4j.{ Level, Logger }
    val rootLogger = Logger.getRootLogger()
    rootLogger.setLevel(Level.ERROR)
  }

  def setupTwitter() = {
    import scala.io.Source

    for (line <- Source.fromFile("C:\\twitter.txt").getLines) {
      val fields = line.split(" ")
      if (fields.length == 2) {
        System.setProperty("twitter4j.oauth." + fields(0), fields(1))
      }
    }
  }

  def main(args: Array[String]) {

    setupTwitter()
    val ssc = new StreamingContext("local[2]", "PopularHashtags", Seconds(1))
    setupLogging()
    val tweets = TwitterUtils.createStream(ssc, None)
    val statuses = tweets.map(status => status.getText())
    val tweetwords = statuses.flatMap(tweetText => tweetText.split(" "))
    val hashtags = tweetwords.filter(word => word.startsWith("#"))
    val hashtagKeyValues = hashtags.map(hashtag => (hashtag,1))
    val hashtagCounts = hashtagKeyValues.reduceByKeyAndWindow((x,y) => x + y, (x,y) => x - y, Seconds(300), Seconds(1))
    val sortedResults = hashtagCounts.transform(rdd => rdd.sortBy(x => x._2, false))
    sortedResults.print()
    ssc.checkpoint("C:/checkpoint/")
    ssc.start()
    ssc.awaitTermination()
  }
}

The error I am getting is the one below. The time is getting elapsed every second at the end after the error and is continuing forever but without showing any data. Can anyone please help me understanding what this error actually means..

OUTPUT :

2018-04-10 01:04:00 INFO  SparkContext:54 - Running Spark version 2.3.0
2018-04-10 01:04:00 WARN  NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2018-04-10 01:04:00 INFO  SparkContext:54 - Submitted application: PopularHashtags
2018-04-10 01:04:00 INFO  SecurityManager:54 - Changing view acls to: Anantanuj
2018-04-10 01:04:00 INFO  SecurityManager:54 - Changing modify acls to: Anantanuj
2018-04-10 01:04:00 INFO  SecurityManager:54 - Changing view acls groups to: 
2018-04-10 01:04:00 INFO  SecurityManager:54 - Changing modify acls groups to: 
2018-04-10 01:04:00 INFO  SecurityManager:54 - SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(Anantanuj); groups with view permissions: Set(); users  with modify permissions: Set(Anantanuj); groups with modify permissions: Set()
2018-04-10 01:04:01 INFO  Utils:54 - Successfully started service 'sparkDriver' on port 56843.
2018-04-10 01:04:01 INFO  SparkEnv:54 - Registering MapOutputTracker
2018-04-10 01:04:01 INFO  SparkEnv:54 - Registering BlockManagerMaster
2018-04-10 01:04:01 INFO  BlockManagerMasterEndpoint:54 - Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
2018-04-10 01:04:01 INFO  BlockManagerMasterEndpoint:54 - BlockManagerMasterEndpoint up
2018-04-10 01:04:01 INFO  DiskBlockManager:54 - Created local directory at C:\Users\Anantanuj\AppData\Local\Temp\blockmgr-cd6b548a-5acb-44e9-adf5-7e13f458d900
2018-04-10 01:04:01 INFO  MemoryStore:54 - MemoryStore started with capacity 904.8 MB
2018-04-10 01:04:01 INFO  SparkEnv:54 - Registering OutputCommitCoordinator
2018-04-10 01:04:02 INFO  log:192 - Logging initialized @7536ms
2018-04-10 01:04:02 INFO  Server:346 - jetty-9.3.z-SNAPSHOT
2018-04-10 01:04:02 INFO  Server:414 - Started @7634ms
2018-04-10 01:04:02 INFO  AbstractConnector:278 - Started ServerConnector@1f3f02ee{HTTP/1.1,[http/1.1]}{0.0.0.0:4040}
2018-04-10 01:04:02 INFO  Utils:54 - Successfully started service 'SparkUI' on port 4040.
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@71c27ee8{/jobs,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@1a6c1270{/jobs/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@18a136ac{/jobs/job,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6dee4f1b{/jobs/job/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@176b3f44{/stages,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6ee6f53{/stages/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@421bba99{/stages/stage,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@66ac5762{/stages/stage/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@797cf65c{/stages/pool,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@31bcf236{/stages/pool/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@4b3ed2f0{/storage,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@4fad9bb2{/storage/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@517d4a0d{/storage/rdd,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@7862f56{/storage/rdd/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@3a12c404{/environment,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@1941a8ff{/environment/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@22d7b4f8{/executors,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@149dd36b{/executors/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@38831718{/executors/threadDump,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@2c1156a7{/executors/threadDump/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@33fe57a9{/static,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@142269f2{/,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@331acdad{/api,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6aa3a905{/jobs/job/kill,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@a22cb6a{/stages/stage/kill,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO  SparkUI:54 - Bound SparkUI to 0.0.0.0, and started at http://DESKTOP-AEDI0R6:4040
2018-04-10 01:04:02 INFO  Executor:54 - Starting executor ID driver on host localhost
2018-04-10 01:04:02 INFO  Utils:54 - Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 56856.
2018-04-10 01:04:02 INFO  NettyBlockTransferService:54 - Server created on DESKTOP-AEDI0R6:56856
2018-04-10 01:04:02 INFO  BlockManager:54 - Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
2018-04-10 01:04:02 INFO  BlockManagerMaster:54 - Registering BlockManager BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO  BlockManagerMasterEndpoint:54 - Registering block manager DESKTOP-AEDI0R6:56856 with 904.8 MB RAM, BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO  BlockManagerMaster:54 - Registered BlockManager BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO  BlockManager:54 - Initialized BlockManager: BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO  ContextHandler:781 - Started o.s.j.s.ServletContextHandler@2dbf4cbd{/metrics/json,null,AVAILABLE,@Spark}

ERROR:

Exception in thread "receiver-supervisor-future-0" java.lang.AbstractMethodError
        at org.apache.spark.internal.Logging$class.initializeLogIfNecessary(Logging.scala:99)
        at org.apache.spark.streaming.twitter.TwitterReceiver.initializeLogIfNecessary(TwitterInputDStream.scala:60)
        at org.apache.spark.internal.Logging$class.log(Logging.scala:46)
        at org.apache.spark.streaming.twitter.TwitterReceiver.log(TwitterInputDStream.scala:60)
        at org.apache.spark.internal.Logging$class.logInfo(Logging.scala:54)
        at org.apache.spark.streaming.twitter.TwitterReceiver.logInfo(TwitterInputDStream.scala:60)
        at org.apache.spark.streaming.twitter.TwitterReceiver.onStop(TwitterInputDStream.scala:106)
        at org.apache.spark.streaming.receiver.ReceiverSupervisor.stopReceiver(ReceiverSupervisor.scala:170)
        at org.apache.spark.streaming.receiver.ReceiverSupervisor$$anonfun$restartReceiver$1.apply$mcV$sp(ReceiverSupervisor.scala:194)
        at org.apache.spark.streaming.receiver.ReceiverSupervisor$$anonfun$restartReceiver$1.apply(ReceiverSupervisor.scala:189)
        at org.apache.spark.streaming.receiver.ReceiverSupervisor$$anonfun$restartReceiver$1.apply(ReceiverSupervisor.scala:189)
        at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
        at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)
    -------------------------------------------
    Time: 1523302444000 ms
    -------------------------------------------

    -------------------------------------------
    Time: 1523302445000 ms
    -------------------------------------------

    -------------------------------------------
    Time: 1523302446000 ms
    -------------------------------------------

来源:https://stackoverflow.com/questions/49740964/spark-program-finding-popular-hashtags-from-twiiter

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!