Hive, how do I retrieve all the database's tables columns

前端 未结 2 1514
暗喜
暗喜 2020-12-18 03:32

I want to write the equivalent of this sql request in Hive :

select * from information_schema.columns where table_schema=\'database_name\'
相关标签:
2条回答
  • 2020-12-18 04:01

    How can I access hive's metastore and retrieve all the columns of all the tables stored in a specific database?

    This is one way to connect HiveMetaStoreClient and you can use method getTableColumnsInformation will get columns.

    In this class along with columns all the other information like partitions can be extracted. pls see example client and sample methods.

    import org.apache.hadoop.hive.conf.HiveConf;
    
    // test program
    public class Test {
        public static void main(String[] args){
    
            HiveConf hiveConf = new HiveConf();
            hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
            hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://host:port");
    
            HiveMetaStoreConnector hiveMetaStoreConnector = new HiveMetaStoreConnector(hiveConf);
            if(hiveMetaStoreConnector != null){
                System.out.print(hiveMetaStoreConnector.getAllPartitionInfo("tablename"));
            }
        }
    }
    
    
    // define a class like this
    
    import com.google.common.base.Joiner;
    import com.google.common.collect.Lists;
    import org.apache.hadoop.hive.conf.HiveConf;
    import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    import org.apache.hadoop.hive.metastore.api.FieldSchema;
    import org.apache.hadoop.hive.metastore.api.MetaException;
    import org.apache.hadoop.hive.metastore.api.Partition;
    import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
    import org.apache.hadoop.hive.ql.metadata.Hive;
    import org.apache.thrift.TException;
    import org.joda.time.DateTime;
    import org.joda.time.format.DateTimeFormatter;
    
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.List;
    
    public class HiveMetaStoreConnector {
        private HiveConf hiveConf;
        HiveMetaStoreClient hiveMetaStoreClient;
    
        public HiveMetaStoreConnector(String msAddr, String msPort){
            try {
                hiveConf = new HiveConf();
                hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, msAddr+":"+ msPort);
                hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
            } catch (MetaException e) {
                e.printStackTrace();
                System.err.println("Constructor error");
                System.err.println(e.toString());
                System.exit(-100);
            }
        }
    
        public HiveMetaStoreConnector(HiveConf hiveConf){
            try {
                this.hiveConf = hiveConf;
                hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
            } catch (MetaException e) {
                e.printStackTrace();
                System.err.println("Constructor error");
                System.err.println(e.toString());
                System.exit(-100);
            }
        }
    
        public String getAllPartitionInfo(String dbName){
            List<String> res = Lists.newArrayList();
            try {
                List<String> tableList = hiveMetaStoreClient.getAllTables(dbName);
                for(String tableName:tableList){
                    res.addAll(getTablePartitionInformation(dbName,tableName));
                }
            } catch (MetaException e) {
                e.printStackTrace();
                System.out.println("getAllTableStatistic error");
                System.out.println(e.toString());
                System.exit(-100);
            }
    
            return Joiner.on("\n").join(res);
        }
    
        public List<String> getTablePartitionInformation(String dbName, String tableName){
            List<String> partitionsInfo = Lists.newArrayList();
            try {
                List<String> partitionNames = hiveMetaStoreClient.listPartitionNames(dbName,tableName, (short) 10000);
                List<Partition> partitions = hiveMetaStoreClient.listPartitions(dbName,tableName, (short) 10000);
                for(Partition partition:partitions){
                    StringBuffer sb = new StringBuffer();
                    sb.append(tableName);
                    sb.append("\t");
                    List<String> partitionValues = partition.getValues();
                    if(partitionValues.size()<4){
                        int size = partitionValues.size();
                        for(int j=0; j<4-size;j++){
                            partitionValues.add("null");
                        }
                    }
                    sb.append(Joiner.on("\t").join(partitionValues));
                    sb.append("\t");
                    DateTime createDate = new DateTime((long)partition.getCreateTime()*1000);
                    sb.append(createDate.toString("yyyy-MM-dd HH:mm:ss"));
                    partitionsInfo.add(sb.toString());
                }
    
            } catch (TException e) {
                e.printStackTrace();
                return Arrays.asList(new String[]{"error for request on" + tableName});
            }
    
            return partitionsInfo;
        }
    
        public String getAllTableStatistic(String dbName){
            List<String> res = Lists.newArrayList();
            try {
                List<String> tableList = hiveMetaStoreClient.getAllTables(dbName);
                for(String tableName:tableList){
                    res.addAll(getTableColumnsInformation(dbName,tableName));
                }
            } catch (MetaException e) {
                e.printStackTrace();
                System.out.println("getAllTableStatistic error");
                System.out.println(e.toString());
                System.exit(-100);
            }
    
            return Joiner.on("\n").join(res);
        }
    
        public List<String> getTableColumnsInformation(String dbName, String tableName){
            try {
                List<FieldSchema> fields = hiveMetaStoreClient.getFields(dbName, tableName);
                List<String> infs = Lists.newArrayList();
                int cnt = 0;
                for(FieldSchema fs : fields){
                    StringBuffer sb = new StringBuffer();
                    sb.append(tableName);
                    sb.append("\t");
                    sb.append(cnt);
                    sb.append("\t");
                    cnt++;
                    sb.append(fs.getName());
                    sb.append("\t");
                    sb.append(fs.getType());
                    sb.append("\t");
                    sb.append(fs.getComment());
                    infs.add(sb.toString());
                }
    
                return infs;
    
            } catch (TException e) {
                e.printStackTrace();
                System.out.println("getTableColumnsInformation error");
                System.out.println(e.toString());
                System.exit(-100);
                return null;
            }
        }
    }
    
    0 讨论(0)
  • 2020-12-18 04:06

    If you want to have the ability to run such queries that return hive metadata, you can setup Hive metastore with MySQL, metadata used in Hive is stored in a specific account of MySQL.

    You will have to create a user of MySQL for hive by doing CREATE USER 'hive'@'metastorehost' IDENTIFIED BY 'mypassword'.

    Then you will find tables like COLUMNS_VS with the info you are looking for.

    An example query to retrieve all columns in all tables could be: SELECT COLUMN_NAME, TBL_NAME FROM COLUMNS_V2 c JOIN TBLS a ON c.CD_ID=a.TBL_ID

    Alternatively, you can access this information via REST calls to WebHCat see wiki for more info.

    0 讨论(0)
提交回复
热议问题