实验3:熟悉常用的HBase操作
一、实验目的(1)理解HDFS在Hadoop体系结构中的角色;(2)熟练使用HDFS操作常用的Shell命令;(3)熟悉HDFS操作常用的Java API。二、实验平台操作系统:Linux(建议CentOS);Hadoop版本:3.2.1;JDK版本:1.7或以上版本;Java IDE:IDEA三、实验步骤(1)编程实现以下指定功能,并用Hadoop提供的HBase Shell命令完成相同任务:1
·
一、实验目的
(1)理解HDFS在Hadoop体系结构中的角色;
(2)熟练使用HDFS操作常用的Shell命令;
(3)熟悉HDFS操作常用的Java API。
二、实验平台
- 操作系统:Linux(建议CentOS);
- Hadoop版本:3.2.2;
- HBase版本:2.3.6;
- JDK版本:1.7或以上版本;
- Java IDE:IDEA
三、实验步骤
(1)编程实现以下指定功能,并用Hadoop提供的HBase Shell命令完成相同任务:
1 列出HBase所有的表的相关信息,例如表名;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void getData()throws IOException{
TableName Htable[] = admin.listTableNames();
for (TableName name:Htable){
System.out.println(name.getNameAsString());
}
}
public static void main(String[] args)throws IOException{
init();
getData();
close();
}
}
shell 命令
2 在终端打印出指定的表的所有记录数据;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void printTableData(String tableName) throws IOException{
Table table = connection.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
scan.getAllowPartialResults();
ResultScanner resultScanner = table.getScanner(scan);
for(Result result:resultScanner){
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
System.out.print("行键:" + Bytes.toString(CellUtil.cloneRow(cell)));
System.out.print("\t列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.print("\t列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("\t值:" + Bytes.toString(CellUtil.cloneValue(cell)));
System.out.println("\t时间戳:" + cell.getTimestamp());
}
}
table.close();
}
public static void main(String[] args)throws IOException{
init();
printTableData("student");
close();
}
}
shell 命令
3 向已经创建好的表添加和删除指定的列族或列;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void modifyTable(String tableName, String RowKey, String columnFamily, String column, String value) throws IOException{
Table table = connection.getTable(TableName.valueOf(tableName));
Put put = new Put(RowKey.getBytes());
put.addColumn(columnFamily.getBytes(),column.getBytes(),value.getBytes());
table.put(put);
table.close();
}
public static void modifyTable(String tableName, String RowKey, String columnFamily, String column) throws IOException{
Table table = connection.getTable(TableName.valueOf(tableName));
Delete delete = new Delete(Bytes.toBytes(RowKey));
delete.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column));
//删除指定的列族
//delete.addFamily(Bytes.toBytes(columnFamily));
table.delete(delete);
table.close();
}
public static void main(String[] args)throws IOException{
init();
modifyTable("student","zhangsan","score","English","80");//添加指定列
modifyTable("student","zhangsan","score","English");//删除指定列
close();
}
}
shell 命令
4 清空指定的表的所有记录数据;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void empty(String tableName) throws IOException{
Table table = connection.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
scan.getAllowPartialResults();
ResultScanner resultScanner = table.getScanner(scan);
for(Result result:resultScanner){
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
Delete delete = new Delete(CellUtil.cloneRow(cell));
table.delete(delete);
}
}
System.out.println("删除完毕");
}
public static void main(String[] args)throws IOException{
init();
empty("student");
close();
}
}
shell 命令
5 统计表的行数。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.coprocessor.*;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void rowCount(String tableName){
try {
TableName name = TableName.valueOf(tableName);
if(admin.tableExists(name)) {
Scan scan = new Scan();
AggregationClient aggregationClient = new AggregationClient(configuration);//启动全局aggregation扫描更快,在hbase-site,xml修改
System.out.println("RowCount: " + aggregationClient.rowCount(name, new LongColumnInterpreter(), scan));
}else System.out.println("table not exist");
}catch (Throwable e){
e.printStackTrace();
}
}
public static void main(String[] args)throws IOException{
init();
rowCount("player");
close();
}
}
shell 命令
(2)HBase数据库操作
现有以下关系型数据库中的表和数据,要求将其转换为适合于HBase存储的表并插入数据:
(1)createTable(String tableName, String[] fields)创建表,参数tableName为表的名称,字符串数组fields为存储记录各个字段名称的数组。要求当HBase已经存在名为tableName的表的时候,先删除原有的表,然后再创建新的表。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void createTable(String myTableName, String[] fields) throws IOException{
TableName tableName= TableName.valueOf(myTableName);
if(admin.tableExists(tableName)) {
System.out.println("table exist");
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
TableDescriptorBuilder tableDescriptor = TableDescriptorBuilder.newBuilder(tableName);
for(String str:fields){
ColumnFamilyDescriptor family = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(str)).build();
tableDescriptor.setColumnFamily(family);
}
admin.createTable(tableDescriptor.build());
System.out.println("table created");
}
public static void main(String[] args)throws IOException{
init();
createTable("table2",new String[] {"score"});
close();
}
}
(2)addRecord(String tableName, String row, String[] fields, String[] values)向表tableName、行row(用S_Name表示)和字符串数组fields指定的单元格中添加对应的数据values。其中,fields中每个元素如果对应的列族下还有相应的列限定符的话,用“columnFamily:column”表示。例如,同时向“Math”、“Computer Science”、“English”三列添加成绩时,字符串数组fields为{“Score:Math”, ”Score:Computer Science”, ”Score:English”},数组values存储这三门课的成绩。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void addRecord(String tableName, String row, String[] fields, String[] values) throws IOException{
Table table = connection.getTable(TableName.valueOf(tableName));
int num=0;
for(String column:fields){
Put put = new Put(row.getBytes());
String[] cols = column.split(":");
put.addColumn(cols[0].getBytes(),cols[1].getBytes(),values[num++].getBytes());
table.put(put);
}
}
public static void main(String[] args)throws IOException{
init();
addRecord("Student","zhansan",new String[]{"Score:Math"},new String[]{"98"});
close();
}
}
(3)scanColumn(String tableName, String column)浏览表tableName某一列的数据,如果某一行记录中该列数据不存在,则返回null。要求当参数column为某一列族名称时,如果底下有若干个列限定符,则要列出每个列限定符代表的列的数据;当参数column为某一列具体名称(例如“Score:Math”)时,只需要列出该列的数据。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void scanColumn(String tableName, String column) throws IOException{
Table table = connection.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
scan.getAllowPartialResults();
ResultScanner resultScanner = table.getScanner(scan);
for (Result result = resultScanner.next(); result != null; result = resultScanner.next()) {
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
System.out.print("行键:" + Bytes.toString(CellUtil.cloneRow(cell)));
System.out.print("\t列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.print("\t列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("\t值:" + Bytes.toString(CellUtil.cloneValue(cell)));
System.out.println("\t时间戳:" + cell.getTimestamp());
}
}
table.close();
}
public static void main(String[] args)throws IOException{
init();
scanColumn("Student","S_Name");
close();
}
}
(4)modifyData(String tableName, String row, String column)修改表tableName,行row(可以用学生姓名S_Name表示),列column指定的单元格的数据。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static long ts;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void modifyData(String tableName, String row,String columnFamily,String column)throws IOException{
Scanner scanner = new Scanner(System.in);
String value = scanner.next();
Table table = connection.getTable(TableName.valueOf(tableName));
Put put = new Put(row.getBytes());
Scan scan = new Scan();
ResultScanner resultScanner = table.getScanner(scan);
for (Result result : resultScanner) {
for (Cell cell : result.getColumnCells(row.getBytes(), column.getBytes())) {
ts = cell.getTimestamp();
}
}
put.addColumn(columnFamily.getBytes(),column.getBytes(),ts,value.getBytes());
table.put(put);
}
public static void main(String[] args)throws IOException{
init();
modifyData("Student","row_1","S_Name");
close();
}
}
(5)deleteRow(String tableName, String row)删除表tableName中row指定的行的记录。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
public static void init(){
configuration = HBaseConfiguration.create();
configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
try{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}catch (IOException e){
e.printStackTrace();
}
}
public static void close(){
try {
if(admin != null) {
admin.close();
}
}catch (IOException e){
e.printStackTrace();
}
}
public static void deleteRow(String tableName,String row) throws IOException{
Table table = connection.getTable(TableName.valueOf(tableName));
Delete delete = new Delete(Bytes.toBytes(row));
table.delete(delete);
table.close();
}
public static void main(String[] args)throws IOException{
init();
deleteRow("student", "score");
close();
}
}
四、实验总结及问题
1、学会使用什么做什么事情;
学会使用hbase shell基本命令,使用hbase 基本api
2、在实验过程中遇到了什么问题?是如何解决的?
许多方法尚未掌握,靠查阅官方文档和与同学交流,或者查看博客
3、还有什么问题尚未解决?可能是什么原因导致的。
有部分功能并未完全实现,需要继续调试
更多推荐
已为社区贡献1条内容
所有评论(0)