大数据(HBase-编程java api)

半世苍凉 提交于 2020-03-02 05:21:08

开发环境搭建步骤

1:解压下载下来的hbase的安装包

2:配置windows的hosts文件,地址:C:\Windows\System32\drivers\etc
    配置虚拟机的ip对应的主机名
        比如:
    192.168.153.115 hm02
    192.168.153.116 hs0201
    192.168.153.117 hs0202

3:下载hbase集群中的hbase-site.xml文件
    通过远程工具的ftp功能来进行下载

4:使用开发工具创建hbase客户端工程

    1)将hbase-site.xml以及log4j.properties文件放入工程path
    2)将解压后的hbase的lib目录下的jar包导入工程
    3)写一个测试程序并运行,在运行之前确保我们的hbase的集群已经启动

HbaseJavaTest.java

package com.hbase;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;

public class HbaseJavaTest {
	/*封装公用模块*/
	private static Configuration conf = null ;
	private static Connection conn = null ;
	static {
		conf = HBaseConfiguration.create() ;
		//配置hbase的zookeeper
		conf.set("hbase.zookeeper.quorum", "hmaster,hslave01,hslave02");
		//conn
		try {
			conn = ConnectionFactory.createConnection(conf) ;
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	//添加一条数据及批量
	public static void addOneData() throws Exception{
		//创建表对象
		Table ht = conn.getTable(TableName.valueOf("people")) ;
		
		//创建put对象
		Put put = new Put(Bytes.toBytes("p00001")) ;
		put.addColumn(Bytes.toBytes("cinfo"), Bytes.toBytes("name"), Bytes.toBytes("xiaoshi")) ;
		
//		List<Put> plist = new ArrayList()<Put>(10000) ;
//		for(int i = 0 ; i < 10000 ; i ++){
//			Put put = new Put(Bytes.toBytes("Ande0000"+i)) ;
//			put.addColumn(Bytes.toBytes("cinfo"), Bytes.toBytes("name"), Bytes.toBytes("xiaoshi" + i)) ;
//			plist.add(put) ;
//		}
		
		//添加put到表对象
		ht.put(put);
		//关闭资源
		ht.close(); 
		System.out.println("===========数据插入成功===========");
		
		
	}
	
	//获得表描述方法
	public static void getTableDesc(String tableName) throws Exception{
		Table ht = conn.getTable(TableName.valueOf("people")) ;//HTable ht = new HTable(conf, "people") ;			//获得表对象
		HTableDescriptor td = ht.getTableDescriptor();		//获得表描述对象
		HColumnDescriptor[] hds = td.getColumnFamilies();	//获得列描述对象数组
		for(HColumnDescriptor hd : hds){
			String name = hd.getNameAsString();				//列族名
			int bs = hd.getBlocksize() ;
			int minVers = hd.getMinVersions() ;
			int maxVers = hd.getMaxVersions() ;
			int defVers = HColumnDescriptor.DEFAULT_VERSIONS ;
			System.out.println("name : " + name + 
					" blocksize : " + bs +
					" minVers : " + minVers + 
					" maxVers : " + maxVers + " defVers : " + defVers);
			
		}
		//释放资源
		ht.close(); 
	}
	
	//扫描表的所有数据
	public static void scanTable(String tableName) throws Exception{
		Table ht = conn.getTable(TableName.valueOf("people")) ;
		
		Scan scan = new Scan() ;
		//ResultScanner是客户端获取值的接口
		ResultScanner scanner = ht.getScanner(scan);
		
		//每行的数据就是Result,存储GET获得SCAN操作后获得单行的值
		for(Result res : scanner){
			for(Cell cell : res.listCells()){
				System.out.println("================================================");
				System.out.println("行键:rowkey ===========" + Bytes.toString(res.getRow()));
				System.out.println("列族:columnFam ========" + Bytes.toString(CellUtil.cloneFamily(cell)));
				System.out.println("列:column ============" + Bytes.toString(CellUtil.cloneQualifier(cell)));
				System.out.println("时间戳:timestamp =======" + cell.getTimestamp());
				System.out.println("值:value ==============" + Bytes.toString(CellUtil.cloneValue(cell)));
			}
		}
		//释放资源
		ht.close();
	}
	
	//获得多行的scan数据
	public static void scanForRange() throws Exception{
		Table ht = conn.getTable(TableName.valueOf("people")) ;
		Scan scan = new Scan(Bytes.toBytes("Ande0000500"), Bytes.toBytes("Ande0000600")) ;
		ResultScanner scanner = ht.getScanner(scan);
		for(Result rs : scanner){
			//获得某个列的值
			String res = Bytes.toString(rs.getValue(Bytes.toBytes("cinfo"), Bytes.toBytes("name"))) ;
			System.out.println(res);
		}
		ht.close();
	}
	
	//获得单行的数据
	public static void getForRowKey(String rowkey) throws Exception{
		Table ht = conn.getTable(TableName.valueOf("people")) ;			//获得表对象
		Get get = new Get(Bytes.toBytes(rowkey)) ;
		
		Result result = ht.get(get);
		if( result == null || result.size() == 0){
			System.out.println("没有这个rowkey");
			ht.close();
			return ;
		}
		for(Cell cell : result.listCells()){
			System.out.println("================================================");
			System.out.println("行键:rowkey ===========" + Bytes.toString(result.getRow()));
			System.out.println("列族:columnFam ========" + Bytes.toString(CellUtil.cloneFamily(cell)));
			System.out.println("列:column ============" + Bytes.toString(CellUtil.cloneQualifier(cell)));
			System.out.println("时间戳:timestamp =======" + cell.getTimestamp());
			System.out.println("值:value ==============" + Bytes.toString(CellUtil.cloneValue(cell)));
		}
		ht.close(); 
	}
	
	//删除数据
	public static void deleteRow(String rowkey) throws Exception{
		Table ht = conn.getTable(TableName.valueOf("people")) ;			//获得表对象
		Delete delete = new Delete(Bytes.toBytes(rowkey)) ;
		ht.delete(delete);
		ht.close();
	}
		
	//修改表,添加列族
	public static void alterTableAddCls() throws Exception {
		//创建数据库管理员
		Admin admin = conn.getAdmin() ;
		admin.disableTable(TableName.valueOf("people"));
		HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes("age")) ;
		
		//1:获得表描述对象进行修改
		HTableDescriptor td = admin.getTableDescriptor(TableName.valueOf("people"));
		td.addFamily(hcd);
		
		//通过admin来进行实际的修改
		admin.modifyTable(TableName.valueOf(("people")), td);
		admin.enableTable(TableName.valueOf("people"));
		
		admin.close();
		System.out.println("====添加列族成功====");
	}
	
	//删除该表
	public static void deleteTable() throws Exception{
		Admin admin = conn.getAdmin() ;
		if(admin.tableExists(TableName.valueOf("people"))){
			admin.disableTable(TableName.valueOf("people"));
			admin.deleteTable(TableName.valueOf("people"));
			System.out.println("删除表成功");
		}
		admin.close();
	}
		
	public static void main(String[] args) throws Exception {
		//获得表描述信息
//		getTableDesc("people") ;
		//插入单条数据
//		addOneData() ;
		//扫描数据
//		scanTable("people") ;
		//扫描多行
//		scanForRange() ;
		//获得单行数据
//		getForRowKey("Ande0000500") ;
		//添加列族
//		alterTableAddCls() ;
		
		conn.close();
	}
}

HbaseTest.java

package com.dongnao;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;

public class HbaseTest {

	public static void main(String[] args) throws Exception {
		/*
		 * 我们一般通过来获取Configuration,设置一些参数
		 * 比如zk的地址,端口等
		 * */
		Configuration conf = HBaseConfiguration.create() ;
		conf.set("hbase.zookeeper.quorum", "hm02,hs0201,hs0202");
		
		/*Connection用来获取和hbase的链接*/
		Connection conn = ConnectionFactory.createConnection(conf) ;
		
		/*
		 * Admin这个类主要用来创建表,删除表,启用禁用表等操作的接口类
		 * 过期的类叫HBaseAdmin
		 * */
		Admin admin = conn.getAdmin() ;
		
		/*
		 * 
		 * HTableDescriptor 表描述信息的接口类
		 * TableName 		描述表名称的接口类,把字符串(表名)变成hbase所认识的
		 * HColumnDescriptor 列族的描述信息类,比如版本,压缩方式等等
		 * Put				添加数据的时候需要用到,可以批量添加也可以单条添加
		 * 					若是批量添加,需要创建一个list,将put对象放入
		 * */
		HTableDescriptor table = new HTableDescriptor(TableName.valueOf("people")) ;
		HColumnDescriptor cf = new HColumnDescriptor("cinfo") ;
		cf.setMaxVersions(3) ;
		//添加列族
		table.addFamily(cf) ;
		
		//创建表
		if(!admin.tableExists(TableName.valueOf("people"))){
			admin.createTable(table);
		}else{
			admin.disableTable(TableName.valueOf("people"));
			admin.deleteTable(TableName.valueOf("people"));
			System.out.println("该表已经存在,删除成功!");
		}
		//释放资源
		admin.close();
		System.out.println("===表创建成功===");
	}

}

hbase-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-->
<configuration>
	<property>
		<name>hbase.rootdir</name>
		<value>hdfs://hm02:9000/hbase</value>
	</property>
	<property>
		<name>hbase.cluster.distributed</name>
		<value>true</value>
	</property>
	<property>
		<name>hbase.zookeeper.quorum</name>
		<value>hm02,hs0201,hs0202</value>
	</property>
</configuration>

hosts

# Copyright (c) 1993-2009 Microsoft Corp.
#
# This is a sample HOSTS file used by Microsoft TCP/IP for Windows.
#
# This file contains the mappings of IP addresses to host names. Each
# entry should be kept on an individual line. The IP address should
# be placed in the first column followed by the corresponding host name.
# The IP address and the host name should be separated by at least one
# space.
#
# Additionally, comments (such as these) may be inserted on individual
# lines or following the machine name denoted by a '#' symbol.
#
# For example:
#
#      102.54.94.97     rhino.acme.com          # source server
#       38.25.63.10     x.acme.com              # x client host

# localhost name resolution is handled within DNS itself.
#	127.0.0.1       localhost
#	::1             localhost

192.168.204.1 windows10.microdone.cn
192.168.153.112 hm
192.168.153.113 hs01
192.168.153.114 hs02

192.168.153.115 hm02
192.168.153.116 hs0201
192.168.153.117 hs0202

 

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!