Impala/Hive to get list of tables along with its size
Hive
CLI
show table extended like '.*'
tableName:t100kowner:clouderalocation:file:/home/cloudera/local/t100kinputformat:org.apache.hadoop.mapred.TextInputFormatoutputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormatcolumns:struct columns { i32 i}partitioned:falsepartitionColumns:totalNumberFiles:1totalFileSize:588895maxFileSize:588895minFileSize:588895lastAccessTime:0lastUpdateTime:1492675975000tableName:t10kowner:clouderalocation:file:/home/cloudera/local/t10kinputformat:org.apache.hadoop.mapred.TextInputFormatoutputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormatcolumns:struct columns { i32 i}partitioned:falsepartitionColumns:totalNumberFiles:1totalFileSize:48894maxFileSize:48894minFileSize:48894lastAccessTime:0lastUpdateTime:1492675978000tableName:t1kowner:clouderalocation:file:/home/cloudera/local/t1kinputformat:org.apache.hadoop.mapred.TextInputFormatoutputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormatcolumns:struct columns { i32 i}partitioned:falsepartitionColumns:totalNumberFiles:1totalFileSize:3893maxFileSize:3893minFileSize:3893lastAccessTime:0lastUpdateTime:1492675983000tableName:t1mowner:clouderalocation:file:/home/cloudera/local/t1minputformat:org.apache.hadoop.mapred.TextInputFormatoutputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormatcolumns:struct columns { i32 i}partitioned:falsepartitionColumns:totalNumberFiles:1totalFileSize:6888896maxFileSize:6888896minFileSize:6888896lastAccessTime:0lastUpdateTime:1492675968000
Metastore (e.g. MySql)
select d.name as db_name ,t.tbl_name as tbl_name ,from_unixtime(min(t.create_time)) as create_time ,min(t.owner) as owner ,min(case when tp.param_key = 'COLUMN_STATS_ACCURATE' then tp.param_value end) as COLUMN_STATS_ACCURATE ,min(case when tp.param_key = 'last_modified_by' then tp.param_value end) as last_modified_by ,min(case when tp.param_key = 'last_modified_time' then from_unixtime(tp.param_value) end) as last_modified_time ,min(case when tp.param_key = 'numFiles' then tp.param_value end) as numFiles ,min(case when tp.param_key = 'numRows' then tp.param_value end) as numRows ,min(case when tp.param_key = 'rawDataSize' then tp.param_value end) as rawDataSize ,min(case when tp.param_key = 'totalSize' then tp.param_value end) as totalSize ,min(case when tp.param_key = 'transient_lastDdlTime' then from_unixtime(tp.param_value) end) as transient_lastDdlTime from metastore.DBS as d join metastore.TBLS as t on t.db_id = d.db_id join metastore.TABLE_PARAMS as tp on tp.tbl_id = t.tbl_id group by d.name ,t.tbl_name order by d.name ,t.tbl_name
+---------+----------+---------------------+----------+-----------------------+------------------+---------------------+----------+---------+-------------+-----------+-----------------------+| db_name | tbl_name | create_time | owner | COLUMN_STATS_ACCURATE | last_modified_by | last_modified_time | numFiles | numRows | rawDataSize | totalSize | transient_lastDdlTime |+---------+----------+---------------------+----------+-----------------------+------------------+---------------------+----------+---------+-------------+-----------+-----------------------+| local | t100k | 2017-04-19 23:25:20 | cloudera | true | cloudera | 2017-04-19 23:27:28 | 1 | 100000 | 488895 | 588895 | 2017-04-20 01:12:55 || local | t10k | 2017-04-19 23:25:26 | cloudera | true | cloudera | 2017-04-19 23:27:26 | 1 | 10000 | 38894 | 48894 | 2017-04-20 01:12:58 || local | t1k | 2017-04-19 23:25:30 | cloudera | true | cloudera | 2017-04-19 23:27:22 | 1 | 1000 | 2893 | 3893 | 2017-04-20 01:13:03 || local | t1m | 2017-04-19 23:20:59 | cloudera | true | cloudera | 2017-04-19 23:27:30 | 1 | 1000000 | 5888896 | 6888896 | 2017-04-20 01:12:48 |+---------+----------+---------------------+----------+-----------------------+------------------+---------------------+----------+---------+-------------+-----------+-----------------------+