工作需要做查询并导出数据的接口,考虑了SpringBoot + Spark 和 Impala,Spark只测试了本地模式,时间有限暂时没有测试yarn模式,但是Spark更适合做数据分析,查询Impala 是比较高效的,选择了以 Impala 做查询引擎。这里整合下 Impala 做个记录。不过因为项目有其他处理模块,所以只把Impala部分拆分出来,是完整的独立模块。
目录
- @SpringBootApplication
- @Controller
- @Service
- Impala查询模块
- @configuration
- Result请求结果类
- Constants常量类
- con.properties
- application.properties
- pom.xml
层级关系
@SpringBootApplication
package com.sm
import org.springframework.boot.SpringApplication
import org.springframework.boot.autoconfigure.{EnableAutoConfiguration, SpringBootApplication}
import org.springframework.context.annotation.ComponentScan
/**
* SpringBoot 入口
*
* create by LiuJinHe 2019/9/23
*/
@EnableAutoConfiguration
@ComponentScan
@SpringBootApplication
class CrowdPackageApp
object CrowdPackageApp extends App {
SpringApplication.run(classOf[CrowdPackageApp])
}
@Controller
package com.sm.controller
import com.sm.service.CrowdService
import com.sm.service.impl.CrowdServiceImpl
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.web.bind.annotation.{CrossOrigin, RequestMapping, RequestMethod, RequestParam, ResponseBody, RestController}
/**
* 请求映射地址
*
* create by LiuJinHe 2019/9/23
*/
@RestController
@CrossOrigin
class CrowdController @Autowired()(crowdService:CrowdService){
/**
* 人群包导出请求
*/
@RequestMapping(value = Array("/crowd"), method = Array(RequestMethod.GET))
@ResponseBody
def exportCrowd(@RequestParam("channel_id") channelId: Int, @RequestParam("cp_game_ids") cpGameIds: String,
@RequestParam("action_type") actionType: Int, @RequestParam("os") os: Int,
@RequestParam("begin_time") beginTime: String, @RequestParam("end_time") endTime: String): String = {
crowdService.reqCrowd(channelId, cpGameIds, actionType, os, beginTime, endTime)
}
}
@Service
首先是 Trait,方便以后业务扩展。
package com.sm.service
/**
* 业务抽象类
*
* create by LiuJinHe 2019/9/24
*/
trait CrowdService {
def reqCrowd(channelId: Int, cpGameIds: String, actionType: Int, os: Int, beginTime: String, endTime: String):String
}
实现类
有其他业务代码,这里只拆分了Impala的。
package com.sm.service.impl
import scala.io.Source
/**
* 调用查询命令的实现类
*
* create by LiuJinHe 2019/9/24
*/
@Service("crowdService")
class CrowdServiceImpl extends CrowdService{
private val logger = LoggerFactory.getLogger(classOf[CrowdServiceImpl])
var statSql:String = _
var sqlStr: String = _
var result = new Result[String]
var dataType: String = _
override def reqCrowd(channelId: Int, cpGameIds: String, actionType: Int, os: Int, beginTime: String, endTime: String): String = {
var queryInfo = ""
// 校验参数
result = ParamsUtils.checkParams(channelId, cpGameIds, actionType, os, beginTime, endTime)
if (result.getStatusCode != Result.FAILED) {
// 判断查询类型
if (actionType.toInt == 1) {
// 激活模块
dataType = "IMEI"
statSql = "refresh cp_data.tb_sdk_active_log"
activeAnalysis(cpGameIds, os, beginTime, endTime)
} else if (actionType.toInt == 2) {
// 付费模块
dataType = "IDFA"
statSql = "refresh cp_data.tb_sdk_user_payment"
paymentAnalysis(cpGameIds, os, beginTime, endTime)
} else logger.info("错误参数,参数必须为active or payment!")
// 执行SQL 导出结果
logger.debug("执行SQL:" + sqlStr)
queryInfo = ImpalaConnect.queryWithImpala(statSql, sqlStr)
result.setMsg(queryInfo)
logger.info("查询并导出结果成功")
}
// 执行结果
result.getMsg
}
/**
* 付费,解析参数为 SQL 语句
*/
def paymentAnalysis(cpGameIds: String, os: Int, beginTime: String, endTime: String): String = {
logger.info("构建付费 SQL 命令")
sqlStr =
s"""
|select distinct tab.device_id from cp_data.tb_sdk_user_payment tab,cp_data.tb_base_game_conf conf where tab.game_id = conf.game_id
| and tab.`date` between '$beginTime' and '$endTime'
| and conf.cp_game_id in ($cpGameIds)
""".stripMargin
if (os.equals(1)) {
sqlStr += s"and right(cast(tab.package_id as string),2) != '99';"
} else if (os.equals(2)) {
sqlStr += s"and right(cast(tab.package_id as string),2) = '99';"
} else result = new Result[String](Result.FAILED, "构建sql时发现os类型错误")
sqlStr
}
/**
* 激活,解析参数为 SQL 语句
*/
def activeAnalysis(cpGameIds: String, os: Int, beginTime: String, endTime: String): String = {
logger.info("构建激活 SQL 命令")
sqlStr =
s"""
|select distinct tab.device_id from cp_data.tb_sdk_active_log tab,cp_data.tb_base_game_conf conf where tab.game_id = conf.game_id
| and tab.`date` between '$beginTime' and '$endTime'
| and tab.os = '$os'
| and conf.cp_game_id in ($cpGameIds)
""".stripMargin
sqlStr
}
}
Impala查询模块
package com.sm.execute
import java.io.{File, FileWriter}
import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet, SQLException}
import com.sm.constants.Constants
import org.slf4j.{Logger, LoggerFactory}
/**
* Scala 连接 Impala
*
* create by LiuJinHe 2019/9/24
*/
object ImpalaConnect {
private val logger: Logger = LoggerFactory.getLogger(ImpalaConnect.getClass)
def queryWithImpala(statSql: String, sqlStr: String): String = {
val date = System.currentTimeMillis()
var conn: Connection = null
var prepareStat: PreparedStatement = null
var result: ResultSet = null
// 生成文件
val file = new File(Constants.IMPALA_OUT_PATH)
val fileWriter = new FileWriter(file)
// SQL
println("Impala开始执行sql查询")
try {
// 连接 IMPALA 查询
Class.forName(Constants.IMPALA_JDBC)
conn = DriverManager.getConnection(Constants.IMPALA_URL, Constants.IMPALA_USER, Constants.IMPALA_PASSWORD)
prepareStat = conn.prepareStatement(sqlStr)
val query= prepareStat.executeQuery
println("刷新表成功")
prepareStat = conn.prepareStatement(sqlStr)
result = prepareStat.executeQuery
// 写入文件
var count = 0
while (result.next) {
val str = result.getString(1)
fileWriter.write( str + "\n")
fileWriter.flush()
count += 1
}
println("导出结果成功,总条数: " + count)
} catch {
case e: Exception =>
logger.info("jdbc查询impala失败", e)
} finally try {
disConnect(conn, result, prepareStat)
fileWriter.close()
}
"查询成功!"
}
def disConnect(connection: Connection, rs: ResultSet, ps: PreparedStatement): Unit = {
try {
if (rs != null) rs.close()
if (ps != null) ps.close()
if (connection != null) {
connection.close()
}
} catch {
case e: SQLException =>
logger.info("jdbc连接关闭失败", e)
}
}
}
@configuration
package com.sm.config
import org.springframework.context.annotation.Configuration
import org.springframework.web.servlet.config.annotation.{CorsRegistry, WebMvcConfigurer}
/**
* create by LiuJinHe 2019/9/24
*/
@Configuration
class CrowdConfig extends WebMvcConfigurer {
override def addCorsMappings(registry: CorsRegistry): Unit = {
registry.addMapping("/**")
.allowedOrigins("*")
.allowCredentials(true)
.allowedMethods("GET", "POST", "PUT", "DELETE", "OPTIONS")
.maxAge(3600)
}
}
Result请求结果类
package com.sm.common
import java.io.Serializable
import scala.beans.BeanProperty
/**
* 请求结果
*
* create by LiuJinHe 2019/9/23
*/
@SerialVersionUID(1L)
object Result {
val SUCCESS = 1
val FAILED = 0
val DEFAULT_SUCCESS_MESSAGE = "success"
val DEFAULT_FAILED_MESSAGE = "failed"
def SuccessResult[T] = new Result[T](SUCCESS, DEFAULT_SUCCESS_MESSAGE)
def SuccessResult[T](msg: String) = new Result[T](SUCCESS, msg)
def SuccessResult[T](data: T) = new Result[T](SUCCESS, DEFAULT_SUCCESS_MESSAGE, data)
def SuccessResult[T](msg: String, data: T) = new Result[T](SUCCESS, msg, data)
def FailedResult[T] = new Result[T](FAILED, DEFAULT_FAILED_MESSAGE)
def FailedResult[T](msg: String) = new Result[T](FAILED, msg)
def FailedResult[T](data: T) = new Result[T](FAILED, DEFAULT_FAILED_MESSAGE, data)
def FailedResult[T](msg: String, data: T) = new Result[T](FAILED, msg, data)
}
@SerialVersionUID(1L)
class Result[T] extends Serializable {
@BeanProperty var statusCode: Int = 0
@BeanProperty var msg: String = _
@BeanProperty var data: T = _
this.statusCode = Result.SUCCESS
this.msg = Result.DEFAULT_SUCCESS_MESSAGE
def this(statusCode: Int, msg: String) {
this()
this.statusCode = statusCode
this.msg = msg
}
def this(statusCode: Int, msg: String, data: T) {
this()
this.statusCode = statusCode
this.msg = msg
this.data = data
}
}
Constants常量类
package com.sm.constants
import java.io.InputStream
import java.util.Properties
/**
* create by LiuJinHe 2019/9/24
*/
object Constants {
var IMPALA_JDBC: String = _
var IMPALA_URL: String = _
var IMPALA_USER: String = _
var IMPALA_PASSWORD: String = _
var IMPALA_OUT_PATH:String = _
var in: InputStream = _
try {
in = Constants.getClass.getClassLoader.getResourceAsStream("conf.properties")
val prop: Properties = new Properties()
prop.load(in)
IMPALA_JDBC = prop.getProperty("impala.jdbc")
IMPALA_URL = prop.getProperty("impala.url")
IMPALA_USER = prop.getProperty("impala.user")
IMPALA_PASSWORD = prop.getProperty("impala.password")
IMPALA_OUT_PATH = prop.getProperty("impala.out.path")
}
catch {
case e: Exception =>
e.printStackTrace()
} finally
in.close()
}
con.properties
常用配置其实可以放到application.properties/yml 中,启动SpringBoot时加载,不过这里还有一些其他配置,就直接放到一个配置文件里一起加载。
impala.jdbc = com.cloudera.impala.jdbc41.Driver
impala.url = jdbc:impala://xxx:21050
impala.user = hive
impala.password = hive
impala.out.path = E:\\testdata\\impala_out_result.txt
application.properties
这里就作为初始设置,按需求可以放入其他需要启动加载的配置,比如可以把Hadoop、Zookeeper、Redis、Kafka等配置添加启动加载。
server.port = 8081
server.max-http-header-size = 10000000
server.use-forward-headers = true
server.tomcat.remote-ip-header = X-Real-IP
server.tomcat.protocol-header = X-Forwarded-Proto
spring.servlet.multipart.maxFileSize = -1
spring.servlet.multipart.maxRequestSize = -1
yml类型配置
server:
port: 8081
max-http-header-size: 1000000
tomcat:
protocol-header: X-Forwarded-Proto
remote-ip-header: X-Real-IP
spring:
application:
name: crowd-package-server
servlet:
multipart:
enabled: true
max-request-size: -1
max-file-size: -1
pom.xml
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<slf4j.version>1.7.28</slf4j.version>
<log4j.version>2.12.1</log4j.version>
<scala.version>2.11.12</scala.version>
</properties>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.7.RELEASE</version>
<relativePath/>
</parent>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-nop</artifactId>
<version>${slf4j.version}</version>
</dependency>
<!-- scala -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<!-- impala jdbc-->
<dependency>
<groupId>com.clodera.impala</groupId>
<artifactId>impalajdbc41</artifactId>
<version>2.6.15</version>
</dependency>
<!-- Gson-->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.5</version>
</dependency>
<!-- sringboot -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-aop</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.47</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>2.6.1</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.13</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>process-resources</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<fork>true</fork>
</configuration>
</plugin>
</plugins>
</build>
</project>
打包上传
java -jar crowd-package-server-1.0.jar
PostMan测试:
来源:CSDN
作者:南宫紫攸
链接:https://blog.csdn.net/weixin_45353054/article/details/103958430