//配置作业各个类
job.setJarByClass(Duplicate.class); job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1);
} }
5.排序
问题描述
将给定的一组数据按升序进行排序,并给出每个数字的次序
解决方案
使用mapreduce默认的排序规则,对于Intwritable类型的数据按照key值大小进行排序
测试数据
输入: in1.txt: 9 0 14 999 15 88 9
in2.txt: 65 54 32 21
10
in3.txt: 1 0 9 21 8
预期结果:
1 0 1 0 2 1 3 8 4 9 4 9 4 9 5 10 6 14 7 15 8 21 8 21 9 32 10 54 11 65 12 88 13 999
代码
package train;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser;
import train.Duplicate.Map; import train.Duplicate.Reduce; /**
* 升序排序(使用mapreduce提供的默认排序规则) * 对于IntWritable类型的数据,按key值大小进行排序 * @author hadoop * */
public class Sort {
//将输入数据的value装换为int类型并作为key输出
public static class Map extends Mapper
public void map(Object key,Text value,Context context) throws IOException, InterruptedException{
String line = value.toString(); numble.set(Integer.parseInt(line)); context.write(numble, one); } }
//全局num确定每个数字的顺序位次 //遍历values来确定每个数字输出的次数 public static class Reduce extends Reducer
public void reduce(IntWritable key,Iterable
//System.out.println(key+\ \
for(IntWritable value:values){ context.write(num, key);
System.out.println(key+\ }
num = new IntWritable(num.get()+1); } }
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.set(\ args = new String[]{\put/sort_out\
//检查运行命令
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs(); if(otherArgs.length != 2){
System.err.println(\ System.exit(2); }
//配置作业名
Job job = new Job(conf,\ //配置作业各个类
job.setJarByClass(Sort.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class);
job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1);
} }
注意,这个代码不需要设置combin,否则结果会不一致,因为会多一次合并
6.倒排索引
问题描述
有多条数据,对数据按照属性值进行分组,比如对于多条语句,按所含的单词进行分组
测试数据
输入: in1.txt
Life is brief , and then you die, you know ?
in2.txt:
Innovation distinguishes between a leader and a follower
in3.txt
We're here to put a dent in the universe . Otherwise why else even be here ?
预期结果:
, in1.txt:1; . in3.txt:1; ? in3.txt:1;
Innovation in2.txt:1; Life in1.txt:1; Otherwise in3.txt:1; We're in3.txt:1; a in3.txt:1;in2.txt:2; and in2.txt:1;in1.txt:1; be in3.txt:1; between in2.txt:1; brief in1.txt:1; dent in3.txt:1; die, in1.txt:1;
distinguishes in2.txt:1; else in3.txt:1; even in3.txt:1; follower in2.txt:1; here in3.txt:2; in in3.txt:1; is in1.txt:1;
know in1.txt:1; leader in2.txt:1; put in3.txt:1; the in3.txt:1;
then in1.txt:1; to in3.txt:1;
百度搜索“77cn”或“免费范文网”即可找到本站免费阅读全部范文。收藏本站方便下次阅读,免费范文网,提供经典小说综合文库Hadoop那些事儿(四) - MapReduce编程实例(基础)(4)在线全文阅读。
相关推荐: