Hadoop HDFS文件操作的Java代码

转载自:http://www.open-open.com/lib/view/open1373589519909.html

1、创建目录

1
2
3
4
5
6
7
8
9
10
11
12
13
14
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
 
public class MakeDir {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path path = new Path("/user/hadoop/data/20130709");
        fs.create(path);
        fs.close();
    }
}

2、删除目录

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
 
public class DeleteDir {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
         
        Path path = new Path("/user/hadoop/data/20130710");
        fs.delete(path);
        fs.close();
    }
}

3、写文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
 
public class WriteFile {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path path = new Path("/user/hadoop/data/write.txt");
        FSDataOutputStream out = fs.create(path);
        out.writeUTF("da jia hao,cai shi zhen de hao!");
        fs.close();
    }
}

4、读文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
 
public class ReadFile {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path path = new Path("/user/hadoop/data/write.txt");
         
        if(fs.exists(path)){
            FSDataInputStream is = fs.open(path);
            FileStatus status = fs.getFileStatus(path);
            byte[] buffer = new byte[Integer.parseInt(String.valueOf(status.getLen()))];
            is.readFully(0, buffer);
            is.close();
            fs.close();
            System.out.println(buffer.toString());
        }
    }
}

5、上传本地文件到HDFS

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
 
public class CopyFromLocalFile {
 
    public static void main(String[] args) throws IOException {
         
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path src = new Path("/home/hadoop/word.txt");
        Path dst = new Path("/user/hadoop/data/");
        fs.copyFromLocalFile(src, dst);
        fs.close();
    }
}

6、删除文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
 
public class DeleteFile {
 
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
         
        Path path = new Path("/user/hadoop/data/word.txt");
        fs.delete(path);
        fs.close();
    }
}

7、获取给定目录下的所有子目录以及子文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class GetAllChildFile {
static Configuration conf = new Configuration();


public static void main(String[] args)throws IOException {
FileSystem fs = FileSystem.get(conf);
Path path = new Path("/user/hadoop");
getFile(path,fs);
//fs.close();
}

public static void getFile(Path path,FileSystem fs) throws IOException {

FileStatus[] fileStatus = fs.listStatus(path);
for(int i=0;i<fileStatus.length;i++){
if(fileStatus[i].isDir()){
Path p = new Path(fileStatus[i].getPath().toString());
getFile(p,fs);
}else{
System.out.println(fileStatus[i].getPath().toString());
}
}
}

}

8、查找某个文件在HDFS集群的位置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;

public class FindFile {

public static void main(String[] args) throws IOException {
getFileLocal();
}

/**
* 查找某个文件在HDFS集群的位置
* @Title:
* @Description:
* @param
* @return
* @throws
*/
public static void getFileLocal() throws IOException{
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new Path("/user/hadoop/data/write.txt");

FileStatus status = fs.getFileStatus(path);
BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());

int length = locations.length;
for(int i=0;i<length;i++){
String[] hosts = locations[i].getHosts();
System.out.println("block_" + i + "_location:" + hosts[i]);
}
}

}

9、HDFS集群上所有节点名称信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
package com.hadoop.file;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;

public class FindFile {

public static void main(String[] args) throws IOException {
getHDFSNode();
}

/**
* HDFS集群上所有节点名称信息
* @Title:
* @Description:
* @param
* @return
* @throws
*/
public static void getHDFSNode() throws IOException{
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);

DistributedFileSystem dfs = (DistributedFileSystem)fs;
DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats();

for(int i=0;i<dataNodeStats.length;i++){
System.out.println("DataNode_" + i + "_Node:" + dataNodeStats[i].getHostName());
}

}


}

本文作者:Qiu Qingyu
版权声明:本博客所有文章除特别声明外,均采用CC BY-NC-SA 3.0 CN许可协议。转载请注明出处!
本文永久链接:http://qiuqingyu.cn/2016/01/05/Hadoop-HDFS文件操作的Java代码/