第二步: 增加分区,导入数据CREATE EXTERNAL TABLE IF NOT EXISTS xxx_access_log(
http_host STRING,
ip STRING,
time STRING,
http_method STRING,
uri STRING,
http_response_code STRING,
body_bytes_send INT,
referer STRING,
user_agent STRING,
x_forwarded_for STRING,
cookie STRING,
request_time STRING,
content_length INT,
request_body STRING)
COMMENT 'input xxx access log'
PARTITIONED BY(event_date STRING, event_hour STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES(
"input.regex" = "自己的正则")
STORED AS TEXTFILE
LOCATION "/data/xxx";
ALTER TABLE $table_name ADD IF NOT EXISTS
PARTITION event_date='20151216', event_hour='00' location "/data/xxx/20151216/00";
PARTITION event_date='20151216', event_hour='01' location "/data/xxx/20151216/01";
...
PARTITION event_date='20151216', event_hour='23' location "/data/xxx/20151216/23";
数据导入后,会因为hive找不到hive-contrib-xxx.jar包,查询时会报错ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' WITH SERDEPROPERTIES
增加hive/conf/hive-env.sh
export HIVE_AUX_JARS_PATH=/home/xxx/hive/lib/hive-contrib-xxx.jar