目标
- 行转列
- 列转行
初始化
初始化数据
[root@kf30-hadoop-1 ~]# cat /home/hdfs/data/ad_list.txt
ad_101 http://www.google.com catalog8|catalog1
ad_102 http://www.sohu.com catalog6|catalog3
ad_103 http://www.baidu.com catalog7
ad_104 http://www.qq.com catalog5|catalog1|catalog4|catalog9
ad_105 http://sina.com创建表
create table ad_list(
ad_id string,
url string,
catalogs string
)
row format delimited
fields terminated by '\t'加载数据
load data local inpath '/home/hdfs/data/ad_list.txt' into table ad_list_1;
行转列
- 函数
concat_ws(',',array(string))
字符串拼接collect_set(catalog)
set集合(去重)collect_list(catalog)
list集合(不去重)select ad_id,concat_ws(',',collect_set(catalog)) as catalogs from col_to_row group by ad_id;
row_to_col.ad_id row_to_col.catalogs
ad_101 catalog8,catalog1
ad_102 catalog6,catalog3
ad_103 catalog7
ad_104 catalog5,catalog1,catalog4,catalog9
ad_105
列转行
- 函数
lateral view outer explode()
split(catalogs,'\\|')
select ad_id,catalog from ad_list_1 lateral view outer explode(split(catalogs,'\\|')) t as catalog;
col_to_row.ad_id col_to_row.catalog
ad_101 catalog8
ad_101 catalog1
ad_102 catalog6
ad_102 catalog3
ad_103 catalog7
ad_104 catalog5
ad_104 catalog1
ad_104 catalog4
ad_104 catalog9
ad_105 NULL