今天面试被虐。。。好久没写MapRedue 忘记了。额额。。。
import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** * * * File Name: PageVisitAge_Avg.java * * General Description: Copyright and file header. * * Revision History: * Modification * Author Date(MM/DD/YYYY) JiraID Description of Changes * --------------------- ------------ ---------- ----------------------------- * @author Bill Zhang 2017年9月6日 * */ public class PageVisitAge_Avg extends Configured implements Tool{ static class PageVisitMapper extends Mapper<LongWritable,Text,Text,Text>{ @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { String line = value.toString(); String url = line.split(",")[1]; context.write(new Text(url), value); } } static class PageVisitReducer extends Reducer<Text, Text, Text, Text>{ private ConcurrentHashMap<String, String> dictTable = new ConcurrentHashMap<String, String>(); @Override protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { URI[] path_arr = context.getCacheFiles(); if (path_arr.length == 0) { throw new FileNotFoundException("Distributed cache file not found."); } URI dict_uri = path_arr[0]; FileSystem fs = FileSystem.get(context.getConfiguration()); FSDataInputStream in = fs.open(new Path(dict_uri)); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String user_dict = null; while ((user_dict = br.readLine()) != null) { String[] records = user_dict.split(","); //加载字典表 dictTable.put(records[0], user_dict); } } @Override protected void reduce(Text arg0, Iterable<Text> arg1, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { SummaryStatistics statistics = new SummaryStatistics (); //计算访问访问page的平均年龄 for(Text value : arg1){ String id = value.toString().split(",")[0]; String userInfo = dictTable.get(id); String age = userInfo.split(",")[2]; statistics.addValue(Double.valueOf(age)); System.out.println(value); } double avg = statistics.getMean(); context.write(arg0, new Text(avg+"")); } } @Override public int run(String[] args) throws Exception { Configuration conf =getConf(); conf.set("mapreduce.input.fileinputformat.input.dir.recursive", "true"); Job job = Job.getInstance(conf,PageVisitAge_Avg.class.getSimpleName()); job.setJarByClass(getClass()); /** * 用户访问的网页历史 * 例如: * 1,http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html * 2,http://docs.oracle.com/javase/8/ * 3,http://docs.oracle.com/javase/8/docs/api/index.html */ FileInputFormat.addInputPath(job,new Path(args[0])); /** * 用户字典 * 例如: * 1,zhangsan,20,male * 2,zhangsi,23,female * 3,susan,30,male * */ job.addCacheFile(new URI(args[1])); FileOutputFormat.setOutputPath(job,new Path(args[2])); job.setMapOutputKeyClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(PageVisitMapper.class); job.setReducerClass(PageVisitReducer.class); job.setNumReduceTasks(10); return job.waitForCompletion(true)?0:1; } /** * * @param args * @throws Exception */ public static void main(String [] args ) throws Exception{ int ec = ToolRunner.run(new Configuration(), new PageVisitAge_Avg(), args); System.exit(ec); } }
相关推荐
Google,Baidu,腾讯近期面试题总结,侧重于海量数据的处理方法,面试利器,实用有效!
腾讯面试试题,38页,希望对求职人士游泳
前端大厂最新面试题-tencent
Tencent笔试题收集,主要是针对近期的试题的汇总......
包含了十多套腾讯笔试题,主要是近两年各个地方的。
com.tencent.ig.zip
leetcode面试-腾讯专题做题记录 LeetCode腾讯专题地址, 小目标 每天至少一道算法题 每天更新README学习markdown写法(不会真的有人不会markdown吧,不会吧,不会吧) 先这样,有什么想到的再说 1月20日 正式开始,...
_storage_emulated_0_android_data_com.tencent.mobileqq_Tencent_QQfile_recv_.TbsReaderTempcom.tencent.mobileqq_83ca8a5fc862397bd03b8fa8d9944926.dat
一部分的tencent笔试题,找工作有一定价值
tencentcloud-sdk-java-3.1.270
main.16490.com.tencent.ig.obb
com.tencent.mm
Tencent性能测试工具GT
com.tencent.mtt.apkplugin.ipai12777.zip
main.16705.com.tencent.ig.obb
main.16705.com.tencent.ig(1).zip
使用STM32CubeIDE1.0.2创建的基于STM32F103RB mcu的工程,移植TencentOS-Tiny。在板子上运行成功。基于这个压缩包,后面很好修改对应的代码。 Hello world! ###This is task1 ,count is 1 Hello TencentOS ! ...
_storage_emulated_0_android_data_com.tencent.mobileqq_Tencent_QQfile_recv_实训四.zip
TencentOS tiny是腾讯面向物联网领域开发的实时操作系统,具有低功耗,低资源占用,模块化,安全可靠等特点,可有效提升物联网终端产品开发效率。TencentOS tiny 提供精简的 RTOS 内核,内核组件可裁剪可配置,可...
com.tencent.mtt.plugin.imsdk4.zip