FileSplit简单使用

FileSplit类继承关系:

img

FileSplit类中的属性和方法:

img

public class FileSplitTest {
    public static void main(String[] args) throws IOException {
        FileSplit fs = new FileSplit();
        String pathname=fs.getPath().getName();//获取目录名字  
        int depth = fs.getPath().depth();      //获取目录深度  
        fs.getClass();//获取当前类  
        long length = fs.getLength();//获取文件长度  
        SplitLocationInfo[] locationInfo =fs.getLocationInfo(); //获取位置信息  
        String[] locations = fs.getLocations(); //获取位置

    }
}

作业输入:

hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath1.txt  
hadoop  a  
spark   a  
hive    a  
hbase   a  
tachyon a  
storm   a  
redis   a  
hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath2.txt  
hadoop  b  
spark   b  
kafka   b  
tachyon b  
oozie   b  
flume   b  
sqoop   b  
solr    b  
hadoop@hadoop:/home/hadoop/blb$
import java.io.IOException;  


import org.apache.hadoop.conf.Configuration;  
import org.apache.hadoop.fs.Path;  
import org.apache.hadoop.io.LongWritable;  
import org.apache.hadoop.io.NullWritable;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapred.SplitLocationInfo;  
import org.apache.hadoop.mapreduce.Job;  
import org.apache.hadoop.mapreduce.Mapper;  
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
import org.apache.hadoop.mapreduce.lib.input.FileSplit;  
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;  
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;  
import org.apache.hadoop.util.GenericOptionsParser;  


public class GetSplitMapReduce {  
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {  
        Configuration conf = new Configuration();  
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();  
        if(otherArgs.length!=2){  
            System.err.println("Usage databaseV1 <inputpath> <outputpath>");  
        }  


        Job job = Job.getInstance(conf, GetSplitMapReduce.class.getSimpleName() + "1");  
        job.setJarByClass(GetSplitMapReduce.class);  
        job.setMapOutputKeyClass(Text.class);  
        job.setMapOutputValueClass(Text.class);  
        job.setOutputKeyClass(Text.class);  
        job.setOutputValueClass(NullWritable.class);  
        job.setMapperClass(MyMapper1.class);  
        job.setNumReduceTasks(0);  
        job.setInputFormatClass(TextInputFormat.class);  
        job.setOutputFormatClass(TextOutputFormat.class);  
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));  
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));  
        job.waitForCompletion(true);  
    }  
    public static class MyMapper1 extends Mapper<LongWritable, Text, Text, NullWritable>{  
        @Override  
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)  
                throws IOException, InterruptedException {  

            FileSplit fileSplit=(FileSplit) context.getInputSplit();  
            String pathname=fileSplit.getPath().getName();  //获取目录名字  
            int depth = fileSplit.getPath().depth();    //获取目录深度  
            Class<? extends FileSplit> class1 = fileSplit.getClass(); //获取当前类  
            long length = fileSplit.getLength();    //获取文件长度  
            SplitLocationInfo[] locationInfo = fileSplit.getLocationInfo(); //获取位置信息  
            String[] locations = fileSplit.getLocations();  //获取位置  
            long start = fileSplit.getStart(); //The position of the first byte in the file to process.  
            String string = fileSplit.toString();  
            //fileSplit.  

            context.write(new Text("===================================================================================="), NullWritable.get());  
            context.write(new Text("pathname--"+pathname), NullWritable.get());  
            context.write(new Text("depth--"+depth), NullWritable.get());  
            context.write(new Text("class1--"+class1), NullWritable.get());  
            context.write(new Text("length--"+length), NullWritable.get());  
            context.write(new Text("locationInfo--"+locationInfo), NullWritable.get());  
            context.write(new Text("locations--"+locations), NullWritable.get());  
            context.write(new Text("start--"+start), NullWritable.get());  
            context.write(new Text("string--"+string), NullWritable.get());  
        }  
    }  
}

对应inputpath2.txt文件的输出:

hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00000  
====================================================================================  
pathname--inputpath2.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--66  
locationInfo--null  
locations--[Ljava.lang.String;@4ff41ba0  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
====================================================================================  
pathname--inputpath2.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--66  
locationInfo--null  
locations--[Ljava.lang.String;@2341ce62  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
====================================================================================  
pathname--inputpath2.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--66  
locationInfo--null  
locations--[Ljava.lang.String;@35549603  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
====================================================================================  
pathname--inputpath2.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--66  
locationInfo--null  
locations--[Ljava.lang.String;@4444ba4f  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
====================================================================================  
pathname--inputpath2.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--66  
locationInfo--null  
locations--[Ljava.lang.String;@7c23bb8c  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
====================================================================================  
pathname--inputpath2.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--66  
locationInfo--null  
locations--[Ljava.lang.String;@dee2400  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
====================================================================================  
pathname--inputpath2.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--66  
locationInfo--null  
locations--[Ljava.lang.String;@d7d8325  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66  
====================================================================================  
pathname--inputpath2.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--66  
locationInfo--null  
locations--[Ljava.lang.String;@2b2cf90e  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66

对应inputpath1.txt文件的输出:

hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00001  
====================================================================================  
pathname--inputpath1.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--58  
locationInfo--null  
locations--[Ljava.lang.String;@4ff41ba0  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
====================================================================================  
pathname--inputpath1.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--58  
locationInfo--null  
locations--[Ljava.lang.String;@2341ce62  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
====================================================================================  
pathname--inputpath1.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--58  
locationInfo--null  
locations--[Ljava.lang.String;@35549603  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
====================================================================================  
pathname--inputpath1.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--58  
locationInfo--null  
locations--[Ljava.lang.String;@4444ba4f  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
====================================================================================  
pathname--inputpath1.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--58  
locationInfo--null  
locations--[Ljava.lang.String;@7c23bb8c  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
====================================================================================  
pathname--inputpath1.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--58  
locationInfo--null  
locations--[Ljava.lang.String;@dee2400  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
====================================================================================  
pathname--inputpath1.txt  
depth--5  
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit  
length--58  
locationInfo--null  
locations--[Ljava.lang.String;@d7d8325  
start--0  
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58  
hadoop@hadoop:/home/hadoop/blb$

results matching ""

    No results matching ""