案例介绍
1 | - 商品表 |
数据准备
mysql中的基础数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134CREATE DATABASE hiveProject;
-- 地域表
CREATE TABLE `city_info` (
`city_id` int(11) DEFAULT NULL,
`city_name` varchar(255) DEFAULT NULL,
`area` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
insert into `city_info`(`city_id`,`city_name`,`area`)
values
(1,'BEIJING','NC'),
(2,'SHANGHAI','EC'),
(3,'NANJING','EC'),
(4,'GUANGZHOU','SC'),
(5,'SANYA','SC'),
(6,'WUHAN','CC'),
(7,'CHANGSHA','CC'),
(8,'XIAN','NW'),
(9,'CHENGDU','SW'),
(10,'HAERBIN','NE');
-- 商品表
DROP TABLE IF EXISTS `product_info`;
CREATE TABLE `product_info` (
`product_id` int(11) NOT NULL AUTO_INCREMENT,
`product_name` varchar(255) DEFAULT NULL,
`extend_info` varchar(255) DEFAULT NULL,
PRIMARY KEY (`product_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ;
insert into product_info(product_id,product_name,extend_info) values (1,'product1','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (2,'product2','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (3,'product3','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (4,'product4','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (5,'product5','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (6,'product6','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (7,'product7','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (8,'product8','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (9,'product9','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (10,'product10','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (11,'product11','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (12,'product12','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (13,'product13','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (14,'product14','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (15,'product15','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (16,'product16','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (17,'product17','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (18,'product18','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (19,'product19','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (20,'product20','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (21,'product21','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (22,'product22','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (23,'product23','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (24,'product24','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (25,'product25','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (26,'product26','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (27,'product27','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (28,'product28','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (29,'product29','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (30,'product30','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (31,'product31','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (32,'product32','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (33,'product33','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (34,'product34','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (35,'product35','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (36,'product36','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (37,'product37','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (38,'product38','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (39,'product39','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (40,'product40','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (41,'product41','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (42,'product42','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (43,'product43','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (44,'product44','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (45,'product45','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (46,'product46','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (47,'product47','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (48,'product48','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (49,'product49','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (50,'product50','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (51,'product51','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (52,'product52','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (53,'product53','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (54,'product54','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (55,'product55','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (56,'product56','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (57,'product57','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (58,'product58','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (59,'product59','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (60,'product60','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (61,'product61','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (62,'product62','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (63,'product63','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (64,'product64','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (65,'product65','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (66,'product66','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (67,'product67','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (68,'product68','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (69,'product69','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (70,'product70','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (71,'product71','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (72,'product72','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (73,'product73','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (74,'product74','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (75,'product75','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (76,'product76','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (77,'product77','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (78,'product78','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (79,'product79','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (80,'product80','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (81,'product81','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (82,'product82','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (83,'product83','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (84,'product84','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (85,'product85','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (86,'product86','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (87,'product87','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (88,'product88','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (89,'product89','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (90,'product90','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (91,'product91','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (92,'product92','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (93,'product93','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (94,'product94','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (95,'product95','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (96,'product96','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (97,'product97','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (98,'product98','{"product_status":1}');
insert into product_info(product_id,product_name,extend_info) values (99,'product99','{"product_status":0}');
insert into product_info(product_id,product_name,extend_info) values (100,'product100','{"product_status":1}');hive的日志数据
1
2
3
4
5
6
7
8
9
10
11
1295,2bf501a7637549c89cf55342331b15db,2016-05-05 21:01:56,1,72
95,2bf501a7637549c89cf55342331b15db,2016-05-05 21:52:26,1,68
95,2bf501a7637549c89cf55342331b15db,2016-05-05 21:17:03,1,40
95,2bf501a7637549c89cf55342331b15db,2016-05-05 21:32:07,1,21
95,2bf501a7637549c89cf55342331b15db,2016-05-05 21:26:06,1,63
95,2bf501a7637549c89cf55342331b15db,2016-05-05 21:03:11,1,60
95,2bf501a7637549c89cf55342331b15db,2016-05-05 21:43:43,1,30
···这里只放了一部分,详细的日志文件到网盘下载
链接:https://pan.baidu.com/s/1hIfK7OwTbfyDEvjaIrhq2A
提取码:gsoe
复制这段内容后打开百度网盘手机App,操作更方便哦
使用sqoop导入mysql数据到hive中
在hive中建两张我们要导入的表
1
2
3
4
5
6
7
8
9
10
11
12
13use myhive;
create table city_info(
city_id int,
city_name string,
area string
)row format delimited fields terminated by '\t';
create table product_info(
product_id int,
product_name string,
extend_info string
)row format delimited fields terminated by '\t';使用sqoop导入数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29- 导入city_info
sqoop import \
--connect jdbc:mysql://localhost:3306/hiveProject \
--username root --password 123456 \
--delete-target-dir \
--table city_info \
--hive-import \
--hive-database myhive \
--hive-table city_info \
--hive-overwrite \
--fields-terminated-by '\t' \
--lines-terminated-by '\n' \
--split-by city_id \
-m 2
- 导入product_info
sqoop import \
--connect jdbc:mysql://localhost:3306/hiveProject \
--username root --password 123456 \
--delete-target-dir \
--table product_info \
--hive-import \
--hive-database myhive \
--hive-table product_info \
--hive-overwrite \
--fields-terminated-by '\t' \
--lines-terminated-by '\n' \
--split-by product_id \
-m 2
创建user_click表,并加载日志数据
建表
1
2
3
4
5
6
7
8create table user_click(
user_id int,
session_id string,
action_time string,
city_id int,
product_id int
) partitioned by (day string)
row format delimited fields terminated by ',';加载数据
1
load data local inpath '/home/hadoop/data/topn/user_click.txt' overwrite into table user_click partition(day='2016-05-05');
实际操作
思路分析
1
2
3
4
5这里我们分三步走
1、 通过user_click和city_info表获得所有点击的地域情况
2、 根据上一步的数据,统计各区域下各商品的点击次数
3、 通过上一步的数据和product_info表获得商品情况
4、 对上一步的数据做统计处理,获得各区域top3的product-click数据1、 通过user_click和city_info表获得所有点击的地域情况
1
2
3
4
5
6
7
8create table tmp_product_click_basic_info
as
select u.product_id, u.city_id, c.city_name, c.area
from
(select product_id, city_id from user_click where day='2016-05-05' ) u
join
(select city_id, city_name,area from city_info) c
on u.city_id = c.city_id;2、根据上一步的数据,统计各区域下各商品的点击次数
1
2
3
4
5
6
7
8create table tmp_area_product_click_count
as
select
area, product_id, count(1) click_count
from
tmp_product_click_basic_info
group by
product_id, area;3、 通过上一步的数据和product_info表获得商品情况
1
2
3
4
5
6
7create table tmp_area_product_click_count_full_info
as
select
a.product_id, b.product_name, a.area, a.click_count
from
tmp_area_product_click_count a join product_info b
on a.product_id = b.product_id;4、 对数据做统计处理,获得各区域top3的product-click数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30create table area_product_click_count_top3
select *
from (
select
product_id, product_name,area, click_count,
row_number() over(partition by area order by click_count desc) rank
from
tmp_area_product_click_count_full_info
) t where t.rank <=3;
t.product_id t.product_name t.area t.click_count t.rank
7 product7 CC 39 1
26 product26 CC 39 2
70 product70 CC 38 3
4 product4 EC 40 1
96 product96 EC 32 2
5 product5 EC 31 3
9 product9 NC 16 1
40 product40 NC 16 2
5 product5 NC 13 3
56 product56 NW 20 1
67 product67 NW 20 2
48 product48 NW 19 3
38 product38 SC 35 1
98 product98 SC 34 2
33 product33 SC 34 3
16 product16 SW 20 1
95 product95 SW 19 2
60 product60 SW 19 3
细节补充
- 如何在shell上执行整个过程
hive -f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16vim hive.sql
use myhive;
select *
from (
select
product_id, product_name,area, click_count,
row_number() over(partition by area order by click_count desc) rank
from
tmp_area_product_click_count_full_info
) t where t.rank <=3;
通过hive -f hive.sql来执行SQL文件
然后把指令添加到定时任务,就可以每天自动的完成任务了