博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
[SequenceFile_2] SequenceFile 的基本操作
阅读量:6946 次
发布时间:2019-06-27

本文共 8608 字,大约阅读时间需要 28 分钟。


0. 说明

  测试序列文件的读写操作 && 测试序列文件的排序操作 && 测试序列文件的合并操作 && 测试序列文件的压缩方式 && 测试将日志文件转换成序列文件

  作为  中的 SequenceFile 的基本操作 部分的补充存在

 

 

 


 

1. 测试读写 && 压缩

package hadoop.sequencefile;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;import org.junit.Test;import java.io.IOException;/** * 测试序列文件 */public class TestSeqFile {    /**     * 测试序列文件写操作     */    @Test    public void testWriteSeq() throws Exception {        Configuration conf = new Configuration();        // 设置文件系统为本地模式        conf.set("fs.defaultFS", "file:///");        FileSystem fs = FileSystem.get(conf);//        Path path = new Path("E:/test/none.seq");//        Path path = new Path("E:/test/record.seq");        Path path = new Path("E:/test/block.seq");        // 不压缩//        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class,SequenceFile.CompressionType.NONE);        // 记录压缩//        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class,SequenceFile.CompressionType.RECORD);        // 块压缩        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class, SequenceFile.CompressionType.BLOCK);        for (int i = 1; i <= 1000; i++) {            IntWritable key = new IntWritable(i);            Text value = new Text("helloworld" + i);            writer.append(key, value);        }        writer.close();    }    /**     * 测试序列文件读操作     */    @Test    public void testReadSeq() throws Exception {        Configuration conf = new Configuration();        // 设置文件系统为本地模式        conf.set("fs.defaultFS", "file:///");        FileSystem fs = FileSystem.get(conf);        Path path = new Path("E:/test/block.seq");        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);        //初始化两个 Writable 对象        IntWritable key = new IntWritable();        Text value = new Text();        while ((reader.next(key, value))) {            long position = reader.getPosition();            System.out.println("key: " + key.get() + " , " + " val: " + value.toString() + " , " + " pos: " + position);        }    }}

 


 

2. 测试排序

package hadoop.sequencefile;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;import org.junit.Test;import java.util.Random;/** * 测试排序 */public class TestSeqFileSort {    /**     * 创建无序 key-value 文件     */    @Test    public void testWriteRandom() throws Exception {        Configuration conf = new Configuration();        conf.set("fs.defaultFS", "file:///");        FileSystem fs = FileSystem.get(conf);        Path p = new Path("E:/test/random.seq");        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, p, IntWritable.class, Text.class, SequenceFile.CompressionType.RECORD);        // 初始化 random        Random r = new Random();        for (int i = 1; i < 100000; i++) {            // 在0-99999之中随机选取一个值            int j = r.nextInt(100000);            IntWritable key = new IntWritable(j);            Text value = new Text("helloworld" + j);            writer.append(key, value);        }        writer.close();    }    /**     * 测试seqFile排序     */    @Test    public void testSort() throws Exception {        Configuration conf = new Configuration();        conf.set("fs.defaultFS", "file:///");        FileSystem fs = FileSystem.get(conf);        Path pin = new Path("E:/test/random.seq");        Path pout = new Path("E:/test/sort.seq");        SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, IntWritable.class, Text.class, conf);        sorter.sort(pin, pout);    }    /**     * 测试序列文件读操作     */    @Test    public void testReadSeq() throws Exception {        Configuration conf = new Configuration();        // 设置文件系统为本地模式        conf.set("fs.defaultFS", "file:///");        FileSystem fs = FileSystem.get(conf);        Path path = new Path("E:/test/sort.seq");        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);        //初始化两个 Writable 对象        IntWritable key = new IntWritable();        Text value = new Text();        while ((reader.next(key, value))) {            long position = reader.getPosition();            System.out.println("key: " + key.get() + " , " + " val: " + value.toString() + " , " + " pos: " + position);        }    }}

 

 


 

3. 测试合并

package hadoop.sequencefile;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;import org.junit.Test;/** * 测试文件合并,必须是同一种压缩类型 */public class TestSeqFileMerge {    /**     * 测试序列文件写操作     * 创建两个文件,范围为1-100,100-200     */    @Test    public void testWriteSeq() throws Exception {        Configuration conf = new Configuration();        // 设置文件系统为本地模式        conf.set("fs.defaultFS", "file:///");        FileSystem fs = FileSystem.get(conf);//        Path path = new Path("E:/test/block1.seq");        Path path = new Path("E:/test/block2.seq");        // 块压缩        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class, SequenceFile.CompressionType.BLOCK);//        for (int i = 1; i <= 100; i++) {
for (int i = 101; i <= 200; i++) { IntWritable key = new IntWritable(i); Text value = new Text("helloworld" + i); writer.append(key, value); } writer.close(); } /** * 测试文件合并,合并的同时排序 */ @Test public void testMerge() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); Path pin1 = new Path("E:/test/block1.seq"); Path pin2 = new Path("E:/test/block2.seq"); Path pout = new Path("E:/test/merge.seq"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, IntWritable.class, Text.class, conf); Path[] p = {pin1, pin2}; sorter.merge(p, pout); } /** * 测试序列文件读操作 */ @Test public void testReadSeq() throws Exception { Configuration conf = new Configuration(); // 设置文件系统为本地模式 conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); Path path = new Path("E:/test/merge.seq"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); //初始化两个 Writable 对象 IntWritable key = new IntWritable(); Text value = new Text(); while ((reader.next(key, value))) { long position = reader.getPosition(); System.out.println("key: " + key.get() + " , " + " val: " + value.toString() + " , " + " pos: " + position); } }}

 

 

 


4. 测试将日志文件转换成序列文件

package hadoop.sequencefile;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;/** * 测试将日志文件转换成序列文件 * Windows 下查看压缩后的 SequenceFile : * hdfs dfs -text file:///E:/test/access.seq */public class Log2Seq {    public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        // 设置文件系统为本地模式        conf.set("fs.defaultFS", "file:///");        FileSystem fs = FileSystem.get(conf);        Path path = new Path("E:/test/access.seq");        // 不压缩//        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class,SequenceFile.CompressionType.NONE);        // 记录压缩//        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class,SequenceFile.CompressionType.RECORD);        // 块压缩        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, NullWritable.class, Text.class, SequenceFile.CompressionType.BLOCK);        BufferedReader br = new BufferedReader(new FileReader("E:/file/access.log1"));        String line = null;        while ((line = br.readLine()) != null) {            NullWritable key = NullWritable.get();            Text value = new Text(line);            writer.append(key, value);        }        writer.close();    }}

 

 

 

 

 

 

 


 

转载于:https://www.cnblogs.com/share23/p/9887854.html

你可能感兴趣的文章
ArcGIS Engine 中的多线程使用 (转载)
查看>>
linux下c的网络编程---转载
查看>>
filter中的DelegatingFilterProxy使用事例
查看>>
WinForm 天猫2013双11自动抢红包【源码下载】
查看>>
学习数学从《数学之美》开始
查看>>
flashcache的实现与分析
查看>>
[UML]UML系列——状态机图statechart diagram
查看>>
微信公众平台开发(74) 用户分组管理
查看>>
二、jdk命令之javah命令(C Header and Stub File Generator)
查看>>
ios模拟器未能安装此应用程序
查看>>
站长常用的200个js代码 站长常用js代码大全 站长常用js代码集合
查看>>
HBase eclipse开发环境搭建
查看>>
SQL Server - 把星期一(周一)当作每个星期的开始在一年中求取周数
查看>>
【ASP.NET Web API教程】6.2 ASP.NET Web API中的JSON和XML序列化
查看>>
jquery-alert对话框
查看>>
WIN8系统安装软件时提示"扩展属性不一致"的解决方法
查看>>
sqlite3.exe 使用
查看>>
微软职位内部推荐-Senior Software Engineer
查看>>
CAD中批量打印
查看>>
蛋疼的Apple IOS Push通知协议
查看>>