访客visit分析
========================================================================================================================================================================================================================================================================
-- 回头/单次访客统计
select * from ods_click_stream_visit limit 10;
+---------------------------------------+-------------------------------------+--------------------------------+---------------------------------+--------------------------------+---------------------------------+----------------------------------------------------+------------------------------------+---------------------------------+--+
| ods_click_stream_visit.session | ods_click_stream_visit.remote_addr | ods_click_stream_visit.intime | ods_click_stream_visit.outtime | ods_click_stream_visit.inpage | ods_click_stream_visit.outpage | ods_click_stream_visit.referal | ods_click_stream_visit.pagevisits | ods_click_stream_visit.datestr |
+---------------------------------------+-------------------------------------+--------------------------------+---------------------------------+--------------------------------+---------------------------------+----------------------------------------------------+------------------------------------+---------------------------------+--+
| 005fd135-d9ec-4ac2-ba16-706004359d42 | 195.110.138.105 | 2018-11-01 19:10:52 | 2018-11-01 19:10:52 | /cassandra-clustor/ | /cassandra-clustor/ | "https://www.google.com.tw/" | 1 | 20181101 |
| 00a9e4d8-dfb2-4d32-b180-eb2383112317 | 66.249.66.84 | 2018-11-01 07:44:41 | 2018-11-01 07:44:41 | /finance-rhive-repurchase/ | /finance-rhive-repurchase/ | "-" | 1 | 20181101 |
| 0173589c-2fbb-4d60-ad65-44d7468ec0ab | 159.226.251.170 | 2018-11-01 08:26:18 | 2018-11-01 08:26:18 | /hadoop-mahout-roadmap/ | /hadoop-mahout-roadmap/ | "http://f.dataguru.cn/thread-177375-1-1.html" | 1 | 20181101 |
| 065d5e7a-d7fe-45a3-b7b4-a5f053ecc4ab | 221.179.193.202 | 2018-11-02 00:44:07 | 2018-11-02 00:44:07 | /hadoop-mahout-roadmap/ | /hadoop-mahout-roadmap/ | "-" | 1 | 20181101 |
| 14c9ea27-dd75-476e-bea4-7f01d9ee4953 | 218.18.232.228 | 2018-11-01 09:53:55 | 2018-11-01 09:53:55 | /hadoop-family-roadmap/ | /hadoop-family-roadmap/ | "https://www.google.com.hk/" | 1 | 20181101 |
| 165938d8-2c27-4c57-b1e7-613a28d2f1d6 | 113.90.48.158 | 2018-11-01 23:19:41 | 2018-11-01 23:19:41 | /finance-rhive-repurchase/ | /finance-rhive-repurchase/ | "http://blog.fens.me/series-it-finance/" | 1 | 20181101 |
| 1784f12d-0b49-4c0e-b7e4-93dc39883091 | 123.116.73.157 | 2018-11-02 00:58:58 | 2018-11-02 00:58:58 | /hadoop-zookeeper-intro/ | /hadoop-zookeeper-intro/ | "https://www.google.com.hk/" | 1 | 20181101 |
| 182549fa-aca2-49ea-88ce-6aa238bbf1dd | 110.211.10.14 | 2018-11-01 13:31:10 | 2018-11-01 13:31:10 | /hadoop-mahout-roadmap/ | /hadoop-mahout-roadmap/ | "http://f.dataguru.cn/forum.php?mod=viewthread&tid=175501" | 1 | 20181101 |
| 1ba35571-d9c0-4b26-bcd0-4999e395ae7f | 183.60.9.217 | 2018-11-01 07:10:57 | 2018-11-01 07:10:57 | /hadoop-hive-intro/ | /hadoop-hive-intro/ | "-" | 1 | 20181101 |
| 1d5dec98-637b-45f1-9ada-58ae4fff7a9b | 112.65.193.16 | 2018-11-01 08:48:31 | 2018-11-01 08:48:31 | /hadoop-mahout-roadmap/ | /hadoop-mahout-roadmap/ | "-" | 1 | 20181101 |
+---------------------------------------+-------------------------------------+--------------------------------+---------------------------------+--------------------------------+---------------------------------+----------------------------------------------------+------------------------------------+---------------------------------+--+
drop table dw_user_returning;
create table dw_user_returning(
day string,
remote_addr string,
acc_cnt string)
partitioned by (datestr string);
desc dw_user_returning;
+--------------------------+-----------------------+-----------------------+--+
| col_name | data_type | comment |
+--------------------------+-----------------------+-----------------------+--+
| day | string | |
| remote_addr | string | |
| acc_cnt | string | |
| datestr | string | |
| | NULL | NULL |
| # Partition Information | NULL | NULL |
| # col_name | data_type | comment |
| | NULL | NULL |
| datestr | string | |
+--------------------------+-----------------------+-----------------------+--+
select temp.day as day,temp.remote_addr as remote_addr,temp.acc_cnt as acc_cnt
from(select '20181101' as day,remote_addr ,count(session) as acc_cnt from ods_click_stream_visit group by remote_addr) temp
where temp.acc_cnt>1;
+-----------+-----------------+----------+--+
| day | remote_addr | acc_cnt |
+-----------+-----------------+----------+--+
| 20181101 | 220.181.89.174 | 3 |
| 20181101 | 66.249.66.84 | 3 |
+-----------+-----------------+----------+--+
insert overwrite table dw_user_returning partition(datestr='20181101')
select temp.day as day,temp.remote_addr as remote_addr,temp.acc_cnt as acc_cnt
from(select '20181101' as day,remote_addr ,count(session) as acc_cnt from ods_click_stream_visit group by remote_addr) temp
where temp.acc_cnt>1;
------------------------------------------------------------------------------------
select * from dw_user_returning limit 5;
+------------------------+--------------------------------+----------------------------+----------------------------+--+
| dw_user_returning.day | dw_user_returning.remote_addr | dw_user_returning.acc_cnt | dw_user_returning.datestr |
+------------------------+--------------------------------+----------------------------+----------------------------+--+
| 20181101 | 220.181.89.174 | 3 | 20181101 |
| 20181101 | 66.249.66.84 | 3 | 20181101 |
+------------------------+--------------------------------+----------------------------+----------------------------+--+
来源:https://blog.csdn.net/longyanchen/article/details/98885001