一、实验目的

(1)理解HDFS在Hadoop体系结构中的角色;

(2)熟练使用HDFS操作常用的Shell命令;

(3)熟悉HDFS操作常用的Java API。

二、实验平台

  • 操作系统:Linux(建议CentOS);
  • Hadoop版本:3.2.2;
  • HBase版本:2.3.6;
  • JDK版本:1.7或以上版本;
  • Java IDE:IDEA

三、实验步骤

(1)编程实现以下指定功能,并用Hadoop提供的HBase Shell命令完成相同任务:

1 列出HBase所有的表的相关信息,例如表名;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
public class test2 {
    public static Configuration configuration;
    public static Connection connection;
    public static Admin admin;
    public static void init(){
        configuration = HBaseConfiguration.create();
        configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
        try{
            connection = ConnectionFactory.createConnection(configuration);
            admin = connection.getAdmin();
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void close(){
        try {
            if(admin != null) {
                admin.close();
            }
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void getData()throws  IOException{
        TableName Htable[] = admin.listTableNames();
        for (TableName name:Htable){
            System.out.println(name.getNameAsString());
        }
    }
    public static void main(String[] args)throws IOException{
        init();
        getData();
        close();
    }
}

shell 命令
在这里插入图片描述

2 在终端打印出指定的表的所有记录数据;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
    public static Configuration configuration;
    public static Connection connection;
    public static Admin admin;
    public static void init(){
        configuration = HBaseConfiguration.create();
        configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
        try{
            connection = ConnectionFactory.createConnection(configuration);
            admin = connection.getAdmin();
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void close(){
        try {
            if(admin != null) {
                admin.close();
            }
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void printTableData(String tableName) throws IOException{
        Table table = connection.getTable(TableName.valueOf(tableName));
        Scan scan = new Scan();
        scan.getAllowPartialResults();
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            Cell[] cells = result.rawCells();
            for (Cell cell : cells) {
                System.out.print("行键:" + Bytes.toString(CellUtil.cloneRow(cell)));
                System.out.print("\t列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
                System.out.print("\t列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
                System.out.println("\t值:" + Bytes.toString(CellUtil.cloneValue(cell)));
                System.out.println("\t时间戳:" + cell.getTimestamp());
            }
        }
        table.close();
    }
    public static void main(String[] args)throws IOException{
        init();
        printTableData("student");
        close();
    }
}

shell 命令
在这里插入图片描述

3 向已经创建好的表添加和删除指定的列族或列;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
    public static Configuration configuration;
    public static Connection connection;
    public static Admin admin;
    public static void init(){
        configuration = HBaseConfiguration.create();
        configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
        try{
            connection = ConnectionFactory.createConnection(configuration);
            admin = connection.getAdmin();
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void close(){
        try {
            if(admin != null) {
                admin.close();
            }
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void modifyTable(String tableName, String RowKey, String columnFamily, String column, String value) throws IOException{
        Table table = connection.getTable(TableName.valueOf(tableName));
        Put put = new Put(RowKey.getBytes());
        put.addColumn(columnFamily.getBytes(),column.getBytes(),value.getBytes());
        table.put(put);
        table.close();
    }
    public static void modifyTable(String tableName, String RowKey, String columnFamily, String column) throws IOException{
        Table table = connection.getTable(TableName.valueOf(tableName));
        Delete delete = new Delete(Bytes.toBytes(RowKey));
        delete.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column));
        //删除指定的列族
        //delete.addFamily(Bytes.toBytes(columnFamily));
        table.delete(delete);
        table.close();
    }
    public static void main(String[] args)throws IOException{
        init();
        modifyTable("student","zhangsan","score","English","80");//添加指定列
        modifyTable("student","zhangsan","score","English");//删除指定列
        close();
    }
}

shell 命令
在这里插入图片描述

4 清空指定的表的所有记录数据;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
public class test2 {
    public static Configuration configuration;
    public static Connection connection;
    public static Admin admin;
    public static void init(){
        configuration = HBaseConfiguration.create();
        configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
        try{
            connection = ConnectionFactory.createConnection(configuration);
            admin = connection.getAdmin();
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void close(){
        try {
            if(admin != null) {
                admin.close();
            }
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void empty(String tableName) throws IOException{
        Table table = connection.getTable(TableName.valueOf(tableName));
        Scan scan = new Scan();
        scan.getAllowPartialResults();
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            Cell[] cells = result.rawCells();
            for (Cell cell : cells) {
                Delete delete = new Delete(CellUtil.cloneRow(cell));
                table.delete(delete);
            }
        }
        System.out.println("删除完毕");
    }
    public static void main(String[] args)throws IOException{
        init();
        empty("student");
        close();
    }
}

shell 命令
在这里插入图片描述

5 统计表的行数。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.coprocessor.*;
import java.io.IOException;
public class test2 {
    public static Configuration configuration;
    public static Connection connection;
    public static Admin admin;
    public static void init(){
        configuration = HBaseConfiguration.create();
        configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
        try{
            connection = ConnectionFactory.createConnection(configuration);
            admin = connection.getAdmin();
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void close(){
        try {
            if(admin != null) {
                admin.close();
            }
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void rowCount(String tableName){
        try {
            TableName name = TableName.valueOf(tableName);
            if(admin.tableExists(name)) {
                Scan scan = new Scan();
                AggregationClient aggregationClient = new AggregationClient(configuration);//启动全局aggregation扫描更快,在hbase-site,xml修改
                System.out.println("RowCount: " + aggregationClient.rowCount(name, new LongColumnInterpreter(), scan));
            }else System.out.println("table not exist");
        }catch (Throwable e){
            e.printStackTrace();
        }
    }
    public static void main(String[] args)throws IOException{
        init();
        rowCount("player");
        close();
    }
}

shell 命令
在这里插入图片描述

(2)HBase数据库操作

现有以下关系型数据库中的表和数据,要求将其转换为适合于HBase存储的表并插入数据:

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

(1)createTable(String tableName, String[] fields)创建表,参数tableName为表的名称,字符串数组fields为存储记录各个字段名称的数组。要求当HBase已经存在名为tableName的表的时候,先删除原有的表,然后再创建新的表。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
    public static Configuration configuration;
    public static Connection connection;
    public static Admin admin;
    public static void init(){
        configuration = HBaseConfiguration.create();
        configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
        try{
            connection = ConnectionFactory.createConnection(configuration);
            admin = connection.getAdmin();
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void close(){
        try {
            if(admin != null) {
                admin.close();
            }
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void createTable(String myTableName, String[] fields) throws  IOException{
        TableName tableName= TableName.valueOf(myTableName);
        if(admin.tableExists(tableName)) {
            System.out.println("table exist");
            admin.disableTable(tableName);
            admin.deleteTable(tableName);
        }
        TableDescriptorBuilder tableDescriptor = TableDescriptorBuilder.newBuilder(tableName);
        for(String str:fields){
            ColumnFamilyDescriptor family = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(str)).build();
            tableDescriptor.setColumnFamily(family);
        }
        admin.createTable(tableDescriptor.build());
        System.out.println("table created");

    }
    public static void main(String[] args)throws IOException{
        init();
        createTable("table2",new String[] {"score"});
        close();
    }
}
(2)addRecord(String tableName, String row, String[] fields, String[] values)向表tableName、行row(用S_Name表示)和字符串数组fields指定的单元格中添加对应的数据values。其中,fields中每个元素如果对应的列族下还有相应的列限定符的话,用“columnFamily:column”表示。例如,同时向“Math”、“Computer Science”、“English”三列添加成绩时,字符串数组fields为{“Score:Math”, ”Score:Computer Science”, ”Score:English”},数组values存储这三门课的成绩。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
public class test2 {
    public static Configuration configuration;
    public static Connection connection;
    public static Admin admin;
    public static void init(){
        configuration = HBaseConfiguration.create();
        configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
        try{
            connection = ConnectionFactory.createConnection(configuration);
            admin = connection.getAdmin();
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void close(){
        try {
            if(admin != null) {
                admin.close();
            }
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void addRecord(String tableName, String row, String[] fields, String[] values) throws IOException{
        Table table = connection.getTable(TableName.valueOf(tableName));
        int num=0;
        for(String  column:fields){
            Put put = new Put(row.getBytes());
            String[] cols = column.split(":");
            put.addColumn(cols[0].getBytes(),cols[1].getBytes(),values[num++].getBytes());
            table.put(put);
        }
    }
    public static void main(String[] args)throws IOException{
        init();
        addRecord("Student","zhansan",new String[]{"Score:Math"},new String[]{"98"});
        close();
    }
}
(3)scanColumn(String tableName, String column)浏览表tableName某一列的数据,如果某一行记录中该列数据不存在,则返回null。要求当参数column为某一列族名称时,如果底下有若干个列限定符,则要列出每个列限定符代表的列的数据;当参数column为某一列具体名称(例如“Score:Math”)时,只需要列出该列的数据。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
    public static Configuration configuration;
    public static Connection connection;
    public static Admin admin;
    public static void init(){
        configuration = HBaseConfiguration.create();
        configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
        try{
            connection = ConnectionFactory.createConnection(configuration);
            admin = connection.getAdmin();
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void close(){
        try {
            if(admin != null) {
                admin.close();
            }
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void scanColumn(String tableName, String column) throws IOException{
        Table table = connection.getTable(TableName.valueOf(tableName));
        Scan scan = new Scan();
        scan.getAllowPartialResults();
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result = resultScanner.next(); result != null; result = resultScanner.next()) {
            Cell[] cells = result.rawCells();
            for (Cell cell : cells) {
                System.out.print("行键:" + Bytes.toString(CellUtil.cloneRow(cell)));
                System.out.print("\t列族" + Bytes.toString(CellUtil.cloneFamily(cell)));
                System.out.print("\t列:" + Bytes.toString(CellUtil.cloneQualifier(cell)));
                System.out.println("\t值:" + Bytes.toString(CellUtil.cloneValue(cell)));
                System.out.println("\t时间戳:" + cell.getTimestamp());
            }
        }
        table.close();
    }
    public static void main(String[] args)throws IOException{
        init();
        scanColumn("Student","S_Name");
        close();
    }
}
(4)modifyData(String tableName, String row, String column)修改表tableName,行row(可以用学生姓名S_Name表示),列column指定的单元格的数据。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
public class test2 {
    public static Configuration configuration;
    public static Connection connection;
    public static Admin admin;
    public static long ts;
    public static void init(){
        configuration = HBaseConfiguration.create();
        configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
        try{
            connection = ConnectionFactory.createConnection(configuration);
            admin = connection.getAdmin();
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void close(){
        try {
            if(admin != null) {
                admin.close();
            }
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void modifyData(String tableName, String row,String columnFamily,String column)throws IOException{
        Scanner scanner = new Scanner(System.in);
        String value = scanner.next();
        Table table = connection.getTable(TableName.valueOf(tableName));
        Put put = new Put(row.getBytes());
        Scan scan = new Scan();
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result : resultScanner) {
            for (Cell cell : result.getColumnCells(row.getBytes(), column.getBytes())) {
                ts = cell.getTimestamp();
            }
        }
        put.addColumn(columnFamily.getBytes(),column.getBytes(),ts,value.getBytes());
        table.put(put);
    }
    public static void main(String[] args)throws IOException{
        init();
        modifyData("Student","row_1","S_Name");
        close();
    }
}
(5)deleteRow(String tableName, String row)删除表tableName中row指定的行的记录。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class test2 {
    public static Configuration configuration;
    public static Connection connection;
    public static Admin admin;
    public static void init(){
        configuration = HBaseConfiguration.create();
        configuration.set("Hbase.rootdir","hdfs://localhost:9000/hbase");
        try{
            connection = ConnectionFactory.createConnection(configuration);
            admin = connection.getAdmin();
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void close(){
        try {
            if(admin != null) {
                admin.close();
            }
        }catch (IOException e){
            e.printStackTrace();
        }
    }
    public static void deleteRow(String tableName,String row) throws IOException{
        Table table = connection.getTable(TableName.valueOf(tableName));
        Delete delete = new Delete(Bytes.toBytes(row));
        table.delete(delete);
        table.close();
    }
    public static void main(String[] args)throws IOException{
        init();
        deleteRow("student", "score");
        close();
    }
}

四、实验总结及问题

1、学会使用什么做什么事情;

学会使用hbase shell基本命令,使用hbase 基本api

2、在实验过程中遇到了什么问题?是如何解决的?

许多方法尚未掌握,靠查阅官方文档和与同学交流,或者查看博客

3、还有什么问题尚未解决?可能是什么原因导致的。

有部分功能并未完全实现,需要继续调试

Logo

华为开发者空间,是为全球开发者打造的专属开发空间,汇聚了华为优质开发资源及工具,致力于让每一位开发者拥有一台云主机,基于华为根生态开发、创新。

更多推荐