问题
I am trying to run this spark program which will get me the popular hashtags currently on twitter and will only show the top 10 hashtags.
I have supplied the twiiter access token, Secret & the Customer Key, Secret via a text File.
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.twitter.TwitterUtils
object PopularHashtags {
def setupLogging() = {
import org.apache.log4j.{ Level, Logger }
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)
}
def setupTwitter() = {
import scala.io.Source
for (line <- Source.fromFile("C:\\twitter.txt").getLines) {
val fields = line.split(" ")
if (fields.length == 2) {
System.setProperty("twitter4j.oauth." + fields(0), fields(1))
}
}
}
def main(args: Array[String]) {
setupTwitter()
val ssc = new StreamingContext("local[2]", "PopularHashtags", Seconds(1))
setupLogging()
val tweets = TwitterUtils.createStream(ssc, None)
val statuses = tweets.map(status => status.getText())
val tweetwords = statuses.flatMap(tweetText => tweetText.split(" "))
val hashtags = tweetwords.filter(word => word.startsWith("#"))
val hashtagKeyValues = hashtags.map(hashtag => (hashtag,1))
val hashtagCounts = hashtagKeyValues.reduceByKeyAndWindow((x,y) => x + y, (x,y) => x - y, Seconds(300), Seconds(1))
val sortedResults = hashtagCounts.transform(rdd => rdd.sortBy(x => x._2, false))
sortedResults.print()
ssc.checkpoint("C:/checkpoint/")
ssc.start()
ssc.awaitTermination()
}
}
The error I am getting is the one below. The time is getting elapsed every second at the end after the error and is continuing forever but without showing any data. Can anyone please help me understanding what this error actually means..
OUTPUT :
2018-04-10 01:04:00 INFO SparkContext:54 - Running Spark version 2.3.0
2018-04-10 01:04:00 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2018-04-10 01:04:00 INFO SparkContext:54 - Submitted application: PopularHashtags
2018-04-10 01:04:00 INFO SecurityManager:54 - Changing view acls to: Anantanuj
2018-04-10 01:04:00 INFO SecurityManager:54 - Changing modify acls to: Anantanuj
2018-04-10 01:04:00 INFO SecurityManager:54 - Changing view acls groups to:
2018-04-10 01:04:00 INFO SecurityManager:54 - Changing modify acls groups to:
2018-04-10 01:04:00 INFO SecurityManager:54 - SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(Anantanuj); groups with view permissions: Set(); users with modify permissions: Set(Anantanuj); groups with modify permissions: Set()
2018-04-10 01:04:01 INFO Utils:54 - Successfully started service 'sparkDriver' on port 56843.
2018-04-10 01:04:01 INFO SparkEnv:54 - Registering MapOutputTracker
2018-04-10 01:04:01 INFO SparkEnv:54 - Registering BlockManagerMaster
2018-04-10 01:04:01 INFO BlockManagerMasterEndpoint:54 - Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
2018-04-10 01:04:01 INFO BlockManagerMasterEndpoint:54 - BlockManagerMasterEndpoint up
2018-04-10 01:04:01 INFO DiskBlockManager:54 - Created local directory at C:\Users\Anantanuj\AppData\Local\Temp\blockmgr-cd6b548a-5acb-44e9-adf5-7e13f458d900
2018-04-10 01:04:01 INFO MemoryStore:54 - MemoryStore started with capacity 904.8 MB
2018-04-10 01:04:01 INFO SparkEnv:54 - Registering OutputCommitCoordinator
2018-04-10 01:04:02 INFO log:192 - Logging initialized @7536ms
2018-04-10 01:04:02 INFO Server:346 - jetty-9.3.z-SNAPSHOT
2018-04-10 01:04:02 INFO Server:414 - Started @7634ms
2018-04-10 01:04:02 INFO AbstractConnector:278 - Started ServerConnector@1f3f02ee{HTTP/1.1,[http/1.1]}{0.0.0.0:4040}
2018-04-10 01:04:02 INFO Utils:54 - Successfully started service 'SparkUI' on port 4040.
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@71c27ee8{/jobs,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@1a6c1270{/jobs/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@18a136ac{/jobs/job,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6dee4f1b{/jobs/job/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@176b3f44{/stages,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6ee6f53{/stages/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@421bba99{/stages/stage,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@66ac5762{/stages/stage/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@797cf65c{/stages/pool,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@31bcf236{/stages/pool/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@4b3ed2f0{/storage,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@4fad9bb2{/storage/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@517d4a0d{/storage/rdd,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@7862f56{/storage/rdd/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@3a12c404{/environment,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@1941a8ff{/environment/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@22d7b4f8{/executors,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@149dd36b{/executors/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@38831718{/executors/threadDump,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@2c1156a7{/executors/threadDump/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@33fe57a9{/static,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@142269f2{/,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@331acdad{/api,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6aa3a905{/jobs/job/kill,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@a22cb6a{/stages/stage/kill,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO SparkUI:54 - Bound SparkUI to 0.0.0.0, and started at http://DESKTOP-AEDI0R6:4040
2018-04-10 01:04:02 INFO Executor:54 - Starting executor ID driver on host localhost
2018-04-10 01:04:02 INFO Utils:54 - Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 56856.
2018-04-10 01:04:02 INFO NettyBlockTransferService:54 - Server created on DESKTOP-AEDI0R6:56856
2018-04-10 01:04:02 INFO BlockManager:54 - Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
2018-04-10 01:04:02 INFO BlockManagerMaster:54 - Registering BlockManager BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO BlockManagerMasterEndpoint:54 - Registering block manager DESKTOP-AEDI0R6:56856 with 904.8 MB RAM, BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO BlockManagerMaster:54 - Registered BlockManager BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO BlockManager:54 - Initialized BlockManager: BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@2dbf4cbd{/metrics/json,null,AVAILABLE,@Spark}
ERROR:
Exception in thread "receiver-supervisor-future-0" java.lang.AbstractMethodError
at org.apache.spark.internal.Logging$class.initializeLogIfNecessary(Logging.scala:99)
at org.apache.spark.streaming.twitter.TwitterReceiver.initializeLogIfNecessary(TwitterInputDStream.scala:60)
at org.apache.spark.internal.Logging$class.log(Logging.scala:46)
at org.apache.spark.streaming.twitter.TwitterReceiver.log(TwitterInputDStream.scala:60)
at org.apache.spark.internal.Logging$class.logInfo(Logging.scala:54)
at org.apache.spark.streaming.twitter.TwitterReceiver.logInfo(TwitterInputDStream.scala:60)
at org.apache.spark.streaming.twitter.TwitterReceiver.onStop(TwitterInputDStream.scala:106)
at org.apache.spark.streaming.receiver.ReceiverSupervisor.stopReceiver(ReceiverSupervisor.scala:170)
at org.apache.spark.streaming.receiver.ReceiverSupervisor$$anonfun$restartReceiver$1.apply$mcV$sp(ReceiverSupervisor.scala:194)
at org.apache.spark.streaming.receiver.ReceiverSupervisor$$anonfun$restartReceiver$1.apply(ReceiverSupervisor.scala:189)
at org.apache.spark.streaming.receiver.ReceiverSupervisor$$anonfun$restartReceiver$1.apply(ReceiverSupervisor.scala:189)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
-------------------------------------------
Time: 1523302444000 ms
-------------------------------------------
-------------------------------------------
Time: 1523302445000 ms
-------------------------------------------
-------------------------------------------
Time: 1523302446000 ms
-------------------------------------------
来源:https://stackoverflow.com/questions/49740964/spark-program-finding-popular-hashtags-from-twiiter