Impala/Hive to get list of tables along with its size Impala/Hive to get list of tables along with its size hadoop hadoop

Impala/Hive to get list of tables along with its size


Hive

CLI

show table extended like '.*'

tableName:t100kowner:clouderalocation:file:/home/cloudera/local/t100kinputformat:org.apache.hadoop.mapred.TextInputFormatoutputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormatcolumns:struct columns { i32 i}partitioned:falsepartitionColumns:totalNumberFiles:1totalFileSize:588895maxFileSize:588895minFileSize:588895lastAccessTime:0lastUpdateTime:1492675975000tableName:t10kowner:clouderalocation:file:/home/cloudera/local/t10kinputformat:org.apache.hadoop.mapred.TextInputFormatoutputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormatcolumns:struct columns { i32 i}partitioned:falsepartitionColumns:totalNumberFiles:1totalFileSize:48894maxFileSize:48894minFileSize:48894lastAccessTime:0lastUpdateTime:1492675978000tableName:t1kowner:clouderalocation:file:/home/cloudera/local/t1kinputformat:org.apache.hadoop.mapred.TextInputFormatoutputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormatcolumns:struct columns { i32 i}partitioned:falsepartitionColumns:totalNumberFiles:1totalFileSize:3893maxFileSize:3893minFileSize:3893lastAccessTime:0lastUpdateTime:1492675983000tableName:t1mowner:clouderalocation:file:/home/cloudera/local/t1minputformat:org.apache.hadoop.mapred.TextInputFormatoutputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormatcolumns:struct columns { i32 i}partitioned:falsepartitionColumns:totalNumberFiles:1totalFileSize:6888896maxFileSize:6888896minFileSize:6888896lastAccessTime:0lastUpdateTime:1492675968000

Metastore (e.g. MySql)

select    d.name                                                                                         as db_name         ,t.tbl_name                                                                                     as tbl_name         ,from_unixtime(min(t.create_time))                                                              as create_time         ,min(t.owner)                                                                                   as owner         ,min(case when tp.param_key = 'COLUMN_STATS_ACCURATE'  then tp.param_value                 end) as COLUMN_STATS_ACCURATE         ,min(case when tp.param_key = 'last_modified_by'       then tp.param_value                 end) as last_modified_by         ,min(case when tp.param_key = 'last_modified_time'     then from_unixtime(tp.param_value)  end) as last_modified_time           ,min(case when tp.param_key = 'numFiles'               then tp.param_value                 end) as numFiles         ,min(case when tp.param_key = 'numRows'                then tp.param_value                 end) as numRows         ,min(case when tp.param_key = 'rawDataSize'            then tp.param_value                 end) as rawDataSize         ,min(case when tp.param_key = 'totalSize'              then tp.param_value                 end) as totalSize         ,min(case when tp.param_key = 'transient_lastDdlTime'  then from_unixtime(tp.param_value)  end) as transient_lastDdlTime         from            metastore.DBS           as d          join  metastore.TBLS          as t          on    t.db_id = d.db_id          join  metastore.TABLE_PARAMS  as tp          on    tp.tbl_id = t.tbl_id          group by  d.name         ,t.tbl_name         order by  d.name         ,t.tbl_name                  

+---------+----------+---------------------+----------+-----------------------+------------------+---------------------+----------+---------+-------------+-----------+-----------------------+| db_name | tbl_name |     create_time     |  owner   | COLUMN_STATS_ACCURATE | last_modified_by | last_modified_time  | numFiles | numRows | rawDataSize | totalSize | transient_lastDdlTime |+---------+----------+---------------------+----------+-----------------------+------------------+---------------------+----------+---------+-------------+-----------+-----------------------+| local   | t100k    | 2017-04-19 23:25:20 | cloudera | true                  | cloudera         | 2017-04-19 23:27:28 |        1 |  100000 |      488895 |    588895 | 2017-04-20 01:12:55   || local   | t10k     | 2017-04-19 23:25:26 | cloudera | true                  | cloudera         | 2017-04-19 23:27:26 |        1 |   10000 |       38894 |     48894 | 2017-04-20 01:12:58   || local   | t1k      | 2017-04-19 23:25:30 | cloudera | true                  | cloudera         | 2017-04-19 23:27:22 |        1 |    1000 |        2893 |      3893 | 2017-04-20 01:13:03   || local   | t1m      | 2017-04-19 23:20:59 | cloudera | true                  | cloudera         | 2017-04-19 23:27:30 |        1 | 1000000 |     5888896 |   6888896 | 2017-04-20 01:12:48   |+---------+----------+---------------------+----------+-----------------------+------------------+---------------------+----------+---------+-------------+-----------+-----------------------+