sqoop commands:
sqoop list database command
==================
sqoop list-databases \
--connect "jdbc:mysql://quickstart.cloudera:3306" \
--username retail_dba \
--password cloudera
sqoop list tables command
=========================
sqoop list-tables \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username retail_dba \
--password cloudera
sqoop eval command
=========================
sqoop eval \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username retail_dba \
--password cloudera \
--query "select count(1) from order_items"
sqoop import all tables commands
================================
sqoop import-all-tables \
-m 12 \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--warehouse-dir=/user/cloudera/sqoop_import_all
sqoop import individual tables commands
=======================================
--Default
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--as-textfile \
--target-dir=/user/cloudera/departments
note : use below options for various file formats
--as-avrodatafile \
--as-sequencefile \
--as-textfile \
--as-parquetfile \
sqoop import delimitors
=======================
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--as-textfile \
--target-dir=/user/cloudera/departments \
--fields-terminated-by '|' \
--lines-terminated-by '\n' \
sqoop boundary Query and columns
================================
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--target-dir /user/cloudera/departments \
--boundary-query "select 2, 8 from departments" \
--columns department_id,department_name
sqoop where clause
==================
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--target-dir /user/cloudera/departments \
--where "department_id > 4" \
--columns department_id,department_name
sqoop join query and split by
=============================
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--query="select * from orders join order_items on orders.order_id = order_items.order_item_order_id where \$CONDITIONS" \
--target-dir /user/cloudera/order_join \
--split-by order_id
note : sqoop look for the primary key to determine no of mappers. Hence primary key or split by key must be specified.
sqoop incremental load
======================
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--target-dir /user/cloudera/departments \
--append \
--fields-terminated-by '|' \
--lines-terminated-by '\n' \
--check-column "department_id" \
--incremental append \
--last-value 7 \
note : 2 modes are present for incremental load
1. append
2. last modified
HIVE Related sqoop commands
===========================
Loading avro formatted dataset to hive.
1. sqoop command to generate avro dataset from mysql
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--as-avrodatafile \
--target-dir=/user/cloudera/departments
note : an Avro schema file will be generated in the path where sqoop import runs.
2. copy the schema generated to hadoop
fs -put departments.avsc /user/cloudera
3. Create Hive table specifying data set location and avro serde
CREATE EXTERNAL TABLE departments
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
LOCATION 'hdfs://quickstart.cloudera/user/cloudera/departments'
TBLPROPERTIES ('avro.schema.url'='hdfs://quickstart.cloudera/user/cloudera/departments.avsc');
sqoop list database command
==================
sqoop list-databases \
--connect "jdbc:mysql://quickstart.cloudera:3306" \
--username retail_dba \
--password cloudera
sqoop list tables command
=========================
sqoop list-tables \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username retail_dba \
--password cloudera
sqoop eval command
=========================
sqoop eval \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username retail_dba \
--password cloudera \
--query "select count(1) from order_items"
sqoop import all tables commands
================================
sqoop import-all-tables \
-m 12 \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--warehouse-dir=/user/cloudera/sqoop_import_all
sqoop import individual tables commands
=======================================
--Default
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--as-textfile \
--target-dir=/user/cloudera/departments
note : use below options for various file formats
--as-avrodatafile \
--as-sequencefile \
--as-textfile \
--as-parquetfile \
sqoop import delimitors
=======================
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--as-textfile \
--target-dir=/user/cloudera/departments \
--fields-terminated-by '|' \
--lines-terminated-by '\n' \
sqoop boundary Query and columns
================================
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--target-dir /user/cloudera/departments \
--boundary-query "select 2, 8 from departments" \
--columns department_id,department_name
sqoop where clause
==================
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--target-dir /user/cloudera/departments \
--where "department_id > 4" \
--columns department_id,department_name
sqoop join query and split by
=============================
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--query="select * from orders join order_items on orders.order_id = order_items.order_item_order_id where \$CONDITIONS" \
--target-dir /user/cloudera/order_join \
--split-by order_id
note : sqoop look for the primary key to determine no of mappers. Hence primary key or split by key must be specified.
sqoop incremental load
======================
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--target-dir /user/cloudera/departments \
--append \
--fields-terminated-by '|' \
--lines-terminated-by '\n' \
--check-column "department_id" \
--incremental append \
--last-value 7 \
note : 2 modes are present for incremental load
1. append
2. last modified
HIVE Related sqoop commands
===========================
Loading avro formatted dataset to hive.
1. sqoop command to generate avro dataset from mysql
sqoop import \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username=retail_dba \
--password=cloudera \
--table departments \
--as-avrodatafile \
--target-dir=/user/cloudera/departments
note : an Avro schema file will be generated in the path where sqoop import runs.
2. copy the schema generated to hadoop
fs -put departments.avsc /user/cloudera
3. Create Hive table specifying data set location and avro serde
CREATE EXTERNAL TABLE departments
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
LOCATION 'hdfs://quickstart.cloudera/user/cloudera/departments'
TBLPROPERTIES ('avro.schema.url'='hdfs://quickstart.cloudera/user/cloudera/departments.avsc');
No comments:
Post a Comment