//创建数据库
create database interview;
//使用表
use interview;
//创建表
create table page_views(
track_time string,
url string,
session_id string,
referer string,
ip string,
end_user_id string,
city_id string
)
row format delimited
fields terminated by '\t'
;
//加载数据
load data local inpath '/home/hadoop/data/page_views.dat' overwrite into table page_views;
//查询数据
select * from page_views limit 10;
查看表在hdfs上的大小(未压缩前)
desc formatted page_views;
位置在hdfs://node002/9000/user/hive/warehourse/interview.db/page_views
hadoop fs -du -s -h hdfs://node002/9000/user/hive/warehourse/interview.db/page_views;
可以查看数据大小未18.1M
//hive中设置压缩
set hive.exec.compress.output=true;
set mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.BZipCodec;
//创建表
create table page_views_bzip2
row format delimited fields terminated by '\t'
as select * from page_views limit 10;
//查看压缩文件位置
desc fotmatted page_views_bzip2;
查看大小为3.6M
来源:oschina
链接:https://my.oschina.net/u/4434424/blog/4286819