0903-页面单跳转化率统计
需求五:页面单跳转化率统计
5.1 需求概述
计算给定的页面访问流的页面单跳转化率
5.2 简要运行流程
5.3 具体运行流程
5.4 代码实现
5.4.1 获取用户行为数据
// 1. 获取用户行为数据
val actionRDD: RDD[UserVisitAction] = getOriAction(sparkSession, taskParams)
val sessionId2ActionRDD: RDD[(String, UserVisitAction)] = actionRDD.map(action => (action.session_id, action))
5.4.2 获取目标切片
// 2. 获取目标切片
val targetPageFlowStr: String = taskParams.getString(Constants.PARAM_TARGET_PAGE_FLOW)
val targetPageFlow: Array[String] = targetPageFlowStr.split(",")
// Array(1_2,2_3,3_4,4_5)
val targetPF: Array[String] = targetPageFlow
.slice(0, targetPageFlow.length - 1)
.zip(targetPageFlow.tail)
.map {
case (page1, page2) =>
page1 + "_" + page2
}
5.4.3 pageFlowRDD: RDD[(flag, 1)]
// 3. 对sessionId2Action数据进行groupByKey操作
val sessionId2ActionsRDD: RDD[(String, Iterable[UserVisitAction])] = sessionId2ActionRDD.groupByKey()
val filterPageFlowAndOneRDD = sessionId2ActionsRDD.flatMap {
case (sessionId, actions) =>
// 4. 对每个session对应的iterable类型的数据按照时间(action_time)进行排序
val actionSortedList: List[UserVisitAction] = actions.toList.sortWith {
case (action1, action2) =>
DateUtils.parseTime(action1.action_time).getTime < DateUtils.parseTime(action2.action_time).getTime
}
// 5. 取出排序完成的每个action的page_id信息
val pageList: List[Long] = actionSortedList.map {
case (action) =>
action.page_id
}
// 6. 把page_id转化为页面切片形式
val pageFlow: List[String] = pageList.slice(0, pageList.length).zip(pageList.tail).map {
case (page1, page2) =>
page1 + "_" + page2
}
// 7. 根据目标页面页面切片,将不存于目标页面切片的所有切片过滤掉 如 3_5
val filterPageFlow: List[String] = pageFlow.filter {
case (pf) => targetPF.contains(pf)
}
// 8. RDD[(pageSplit, 1L)]
filterPageFlow.map {
case (pf) => (pf, 1L)
}
}
5.4.4 计算每个页面切片的总个数
// 9. 拿到了每一个页面切片的总个数
val pageFlow2CountMap: collection.Map[String, Long] = filterPageFlowAndOneRDD.countByKey()
5.4.5 获取起始页面总个数
// 10. 获取起始页面page1
val startPage: Long = targetPageFlow(0).toLong
// 11. 获取起始页面个数
val startPageCount: Long = sessionId2ActionRDD.filter {
case (sessionId, action) =>
action.page_id == startPage
}.count()
5.4.6 根据所有的切片个数信息,计算实际的页面切片转化率大小
// 12. 根据所有的切片个数信息,计算实际的页面切片转化率大小
val pageConvertString = getPageConvert(startPageCount, pageFlow2CountMap, targetPF)
def getPageConvert(startPageCount: Long
, pageFlow2CountMap: collection.Map[String, Long]
, targetPF: Array[String]) = {
val pageFlowRatio = new mutable.HashMap[String, Double]()
var lastPageFlowCount = startPageCount.toDouble
for (pageFlow <- targetPF) {
val currentPageFlowCount = pageFlow2CountMap.get(pageFlow).get.toDouble
val ratio: Double = currentPageFlowCount / lastPageFlowCount
pageFlowRatio += (pageFlow -> ratio)
lastPageFlowCount = currentPageFlowCount
}
val pageConvertString: String = pageFlowRatio.map {
case (pageFlow, ratio) =>
pageFlow + "=" + ratio
}.mkString("|")
pageConvertString
}
5.4.7 封装case class
// 13. 封装到case class
val pageSplitConvertRate: PageSplitConvertRate = PageSplitConvertRate(taskId, pageConvertString)
val pageSplitConvertRateRDD: RDD[PageSplitConvertRate] = sparkSession.sparkContext.makeRDD(Array(pageSplitConvertRate))
5.4.8 写入数据库
// 14. 写入数据库
import sparkSession.implicits._
pageSplitConvertRateRDD.toDF().write
.format("jdbc")
.option("url", ConfigurationManager.config.getString(Constants.JDBC_URL))
.option("user", ConfigurationManager.config.getString(Constants.JDBC_USER))
.option("password", ConfigurationManager.config.getString(Constants.JDBC_PASSWORD))
.option("dbtable", "page_convert_ratio")
.mode(SaveMode.Append)
.save()
pageConvertString.foreach(println(_))
小结
来源:CSDN
作者:YanLzh_MAlone
链接:https://blog.csdn.net/qq_35199832/article/details/103689949