Extract hive table partition in Spark - java

后端 未结 1 1614
粉色の甜心
粉色の甜心 2021-01-21 16:45

Is there any way in Spark to extract only partition column names? The workaround I am using is to run \"show extended table like table_name\" using HiveContex

相关标签:
1条回答
  • 2021-01-21 17:25

    You can use class HiveMetaStoreClient to query directly from HiveMetaStore.

    This class is widely used by popular APIS also, for interacting with HiveMetaStore for ex: Apache Drill

    org.apache.hadoop.hive.metastore.api.Partition getPartition(String db_name, String tbl_name, List part_vals)

    org.apache.hadoop.hive.metastore.api.Partition getPartition(String db, String tableName, String partName) Map> getPartitionColumnStatistics(String dbName, String tableName, List partNames, List colNames)

    Get partitions column statistics given dbName, tableName, multiple partitions and colName-s

    List getPartitionsByNames(String db_name, String tbl_name, List part_names) Get partitions by a list of partition names.

    Also, there are list methods as well..

    List listPartitionNames(String db_name, String tbl_name, List part_vals, short max_parts)

    List listPartitionNames(String dbName, String tblName, short max)

    List listPartitions(String db_name, String tbl_name, List part_vals, short max_parts)

    List listPartitions(String db_name, String tbl_name, short max_parts)

    Sample Code snippet 1 :

    import org.apache.hadoop.hive.conf.HiveConf;
    
    // test program
    public class Test {
        public static void main(String[] args){
    
            HiveConf hiveConf = new HiveConf();
            hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
            hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://host:port");
    
            HiveMetaStoreConnector hiveMetaStoreConnector = new HiveMetaStoreConnector(hiveConf);
            if(hiveMetaStoreConnector != null){
                System.out.print(hiveMetaStoreConnector.getAllPartitionInfo("tablename"));
            }
        }
    }
    
    
    // define a class like this
    
    import com.google.common.base.Joiner;
    import com.google.common.collect.Lists;
    import org.apache.hadoop.hive.conf.HiveConf;
    import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    import org.apache.hadoop.hive.metastore.api.FieldSchema;
    import org.apache.hadoop.hive.metastore.api.MetaException;
    import org.apache.hadoop.hive.metastore.api.Partition;
    import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
    import org.apache.hadoop.hive.ql.metadata.Hive;
    import org.apache.thrift.TException;
    import org.joda.time.DateTime;
    import org.joda.time.format.DateTimeFormatter;
    
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.List;
    
    public class HiveMetaStoreConnector {
        private HiveConf hiveConf;
        HiveMetaStoreClient hiveMetaStoreClient;
    
        public HiveMetaStoreConnector(String msAddr, String msPort){
            try {
                hiveConf = new HiveConf();
                hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, msAddr+":"+ msPort);
                hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
            } catch (MetaException e) {
                e.printStackTrace();
                System.err.println("Constructor error");
                System.err.println(e.toString());
                System.exit(-100);
            }
        }
    
        public HiveMetaStoreConnector(HiveConf hiveConf){
            try {
                this.hiveConf = hiveConf;
                hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
            } catch (MetaException e) {
                e.printStackTrace();
                System.err.println("Constructor error");
                System.err.println(e.toString());
                System.exit(-100);
            }
        }
    
        public String getAllPartitionInfo(String dbName){
            List<String> res = Lists.newArrayList();
            try {
                List<String> tableList = hiveMetaStoreClient.getAllTables(dbName);
                for(String tableName:tableList){
                    res.addAll(getTablePartitionInformation(dbName,tableName));
                }
            } catch (MetaException e) {
                e.printStackTrace();
                System.out.println("getAllTableStatistic error");
                System.out.println(e.toString());
                System.exit(-100);
            }
    
            return Joiner.on("\n").join(res);
        }
    
        public List<String> getTablePartitionInformation(String dbName, String tableName){
            List<String> partitionsInfo = Lists.newArrayList();
            try {
                List<String> partitionNames = hiveMetaStoreClient.listPartitionNames(dbName,tableName, (short) 10000);
                List<Partition> partitions = hiveMetaStoreClient.listPartitions(dbName,tableName, (short) 10000);
                for(Partition partition:partitions){
                    StringBuffer sb = new StringBuffer();
                    sb.append(tableName);
                    sb.append("\t");
                    List<String> partitionValues = partition.getValues();
                    if(partitionValues.size()<4){
                        int size = partitionValues.size();
                        for(int j=0; j<4-size;j++){
                            partitionValues.add("null");
                        }
                    }
                    sb.append(Joiner.on("\t").join(partitionValues));
                    sb.append("\t");
                    DateTime createDate = new DateTime((long)partition.getCreateTime()*1000);
                    sb.append(createDate.toString("yyyy-MM-dd HH:mm:ss"));
                    partitionsInfo.add(sb.toString());
                }
    
            } catch (TException e) {
                e.printStackTrace();
                return Arrays.asList(new String[]{"error for request on" + tableName});
            }
    
            return partitionsInfo;
        }
    
        public String getAllTableStatistic(String dbName){
            List<String> res = Lists.newArrayList();
            try {
                List<String> tableList = hiveMetaStoreClient.getAllTables(dbName);
                for(String tableName:tableList){
                    res.addAll(getTableColumnsInformation(dbName,tableName));
                }
            } catch (MetaException e) {
                e.printStackTrace();
                System.out.println("getAllTableStatistic error");
                System.out.println(e.toString());
                System.exit(-100);
            }
    
            return Joiner.on("\n").join(res);
        }
    
        public List<String> getTableColumnsInformation(String dbName, String tableName){
            try {
                List<FieldSchema> fields = hiveMetaStoreClient.getFields(dbName, tableName);
                List<String> infs = Lists.newArrayList();
                int cnt = 0;
                for(FieldSchema fs : fields){
                    StringBuffer sb = new StringBuffer();
                    sb.append(tableName);
                    sb.append("\t");
                    sb.append(cnt);
                    sb.append("\t");
                    cnt++;
                    sb.append(fs.getName());
                    sb.append("\t");
                    sb.append(fs.getType());
                    sb.append("\t");
                    sb.append(fs.getComment());
                    infs.add(sb.toString());
                }
    
                return infs;
    
            } catch (TException e) {
                e.printStackTrace();
                System.out.println("getTableColumnsInformation error");
                System.out.println(e.toString());
                System.exit(-100);
                return null;
            }
        }
    }
    

    Sample code snippet example 2 (source)

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.hive.conf.HiveConf;
    import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    import org.apache.hadoop.hive.metastore.api.Database;
    import org.apache.hadoop.hive.metastore.api.MetaException;
    import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
    import org.apache.hive.hcatalog.common.HCatUtil;
    import org.apache.thrift.TException;
    
    import javax.xml.crypto.Data;
    import java.io.IOException;
    import java.util.HashMap;
    
    public class HiveMetaStoreClientTest {
        public static void main(String[] args) {
    
            HiveConf hiveConf = null;
            HiveMetaStoreClient hiveMetaStoreClient = null;
            String dbName = null;
    
            try {
                hiveConf = HCatUtil.getHiveConf(new Configuration());
                hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
    
                dbName = args[0];
    
                getDatabase(hiveMetaStoreClient, dbName);
    
    
            } catch (MetaException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } catch (NoSuchObjectException e) {
                e.printStackTrace();
                System.out.println("===============");
                System.out.println("database " + args[0] + "not exists");
                System.out.println("===============");
                createDatabase(hiveMetaStoreClient, dbName);
                try {
                    getDatabase(hiveMetaStoreClient, dbName);
                } catch (TException e1) {
                    e1.printStackTrace();
                    System.out.println("TMD");
                }
            } catch (TException e) {
                e.printStackTrace();
            }
        }
    
        public static Database getDatabase(HiveMetaStoreClient hiveMetaStoreClient, String dbName) throws TException {
            Database database = null;
    
            database = hiveMetaStoreClient.getDatabase(dbName);
    
            System.out.println(database.getLocationUri());
            System.out.println(database.getOwnerName());
    
            for (String key : database.getParameters().keySet()) {
                System.out.println(key + " = " + database.getParameters().get(key));
            }
            return database;
        }
    
        public static Database createDatabase(HiveMetaStoreClient hiveMetaStoreClient, String dbName) {
            HashMap<String, String> map = new HashMap<String,String>();
            Database database = new Database(dbName, "desc", null, map);
            try {
                hiveMetaStoreClient.createDatabase(database);
            } catch (TException e) {
                e.printStackTrace();
                System.out.println("some error");
            }
            return database;
        }
    }
    
    0 讨论(0)
提交回复
热议问题