使用Java调用HDFS的API进行文件基本操作

本样例主要是代码,其中包括了Java进行上传,下载,删除,创建文件夹,遍历文件夹等操作,代码环境基于MacOS,IDEA,使用Maven来配置依赖包,后面我会放出代码,和pom.xml。Hadoop是2.6 CDH版本。代码中的hmaster是我配置的虚拟机IP地址,需要修改为你的虚拟机IP地址。

HDFSUtil.java

package com.rain.hdfs;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.Before;
import org.junit.Test;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;

public class HDFSUtil {

    FileSystem fs = null;



//    这是先行运行的函数,在其他函数运行之前,将配置写好
    @Before
    public void init() throws Exception{

        //读取classpath下的xxx-site.xml 配置文件,并解析其内容,封装到conf对象中
//        可以将core-site.xml导入到项目中
        Configuration conf = new Configuration();

        //也可以在代码中对conf中的配置信息进行手动设置,会覆盖掉配置文件中的读取的值
        conf.set("fs.defaultFS", "hdfs://hmaster:9000/");

        //根据配置信息,去获取一个具体文件系统的客户端操作实例对象
        fs = FileSystem.get(new URI("hdfs://hmaster:9000/"),conf,"hadoop");

    }



    /**
     * 上传文件,比较底层的写法
     *
     * @throws Exception
     */
    @Test
    public void upload_old() throws Exception {

        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://hmaster:9000/");

        FileSystem fs = FileSystem.get(conf);

        Path dst = new Path("hdfs://hmaster:9000/data/sample.txt");

        FSDataOutputStream os = fs.create(dst);

        FileInputStream is = new FileInputStream("/Users/rain/Downloads/sample.txt");

        IOUtils.copy(is, os);


    }

    /**
     * 上传文件,封装好的写法
     * @throws Exception
     * @throws IOException
     */
    @Test
    public void upload() throws Exception, IOException{

//        前面为上传文件的路径,后面是HDFS中的路径和文件的新名称
        fs.copyFromLocalFile(new Path("/Users/rain/Downloads/sample.txt"), new Path("hdfs://hmaster:9000/data/test.txt"));

    }


    /**
     * 下载文件
     * @throws Exception
     * @throws IllegalArgumentException
     */
    @Test
    public void download() throws Exception {
//      前面是HDFS中文件的路径,后面是要下载到本地的路径以及名称
        fs.copyToLocalFile(new Path("hdfs://hmaster:9000/sample.txt"), new Path("/Users/rain/Downloads/sample.txt"));

    }

    /**
     * 查看文件信息
     * @throws IOException
     * @throws IllegalArgumentException
     * @throws FileNotFoundException
     *
     */
    @Test
    public void listFiles() throws FileNotFoundException, IllegalArgumentException, IOException {

        // listFiles列出的是文件信息,而且提供递归遍历
        RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("/data"), true);

        while(files.hasNext()){

            LocatedFileStatus file = files.next();
            Path filePath = file.getPath();
            String fileName = filePath.getName();
            System.out.println(fileName);

        }

        System.out.println("---------------------------------");

        //listStatus 可以列出文件和文件夹的信息,但是不提供自带的递归遍历
        FileStatus[] listStatus = fs.listStatus(new Path("/data"));
        for(FileStatus status: listStatus){

            String name = status.getPath().getName();
            System.out.println(name + (status.isDirectory()?" is dir":" is file"));

        }

    }

    /**
     * 创建文件夹
     * @throws Exception
     * @throws IllegalArgumentException
     */
    @Test
    public void mkdir() throws IllegalArgumentException, Exception {

        fs.mkdirs(new Path("/data/test"));


    }

    /**
     * 删除文件或文件夹
     * @throws IOException
     * @throws IllegalArgumentException
     */
    @Test
    public void rm() throws IllegalArgumentException, IOException {

        fs.delete(new Path("/data/test"), true);

    }


    public static void main(String[] args) throws Exception {


    }



}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.rain.hadoop</groupId>
    <artifactId>rain-hadoop</artifactId>
    <version>1.0-SNAPSHOT</version>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>6</source>
                    <target>6</target>
                </configuration>
            </plugin>
        </plugins>
    </build>


    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
    </properties>

    <repositories>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
        </repository>
    </repositories>

    <dependencies>
        <!--添加依赖-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.6.0-cdh5.7.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.2.0-cdh5.7.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.0-cdh5.7.0</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
            <scope>test</scope>
        </dependency>
    </dependencies>

</project>
Logo

华为开发者空间,是为全球开发者打造的专属开发空间,汇聚了华为优质开发资源及工具,致力于让每一位开发者拥有一台云主机,基于华为根生态开发、创新。

更多推荐