`
shoppingbill
  • 浏览: 58183 次
  • 性别: Icon_minigender_1
  • 来自: 上海
社区版块
存档分类
最新评论

鹅厂Tencent面试题

阅读更多

今天面试被虐。。。好久没写MapRedue 忘记了。额额。。。

 

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * 
 *
 *  File Name: PageVisitAge_Avg.java
 *
 *  General Description: Copyright and file header.
 *
 *  Revision History:
 *                           Modification
 *   Author                Date(MM/DD/YYYY)   JiraID           Description of Changes
 *   ---------------------   ------------    ----------     -----------------------------
 *   @author Bill Zhang       2017年9月6日
 *
 */
public class PageVisitAge_Avg extends Configured implements Tool{
	
	static class PageVisitMapper extends Mapper<LongWritable,Text,Text,Text>{
		
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			
			String line = value.toString();
			String url = line.split(",")[1];
			
			context.write(new Text(url), value);
		}
		
	}
	
	static class PageVisitReducer extends Reducer<Text, Text, Text, Text>{
		
		private ConcurrentHashMap<String, String> dictTable = new ConcurrentHashMap<String, String>();
		
		@Override
		protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
			 URI[] path_arr = context.getCacheFiles();
			 if (path_arr.length == 0) {
	                throw new FileNotFoundException("Distributed cache file not found.");
	         }
			 
			 URI dict_uri = path_arr[0];
			 FileSystem fs = FileSystem.get(context.getConfiguration());
			 FSDataInputStream in = fs.open(new Path(dict_uri));
			 BufferedReader br = new BufferedReader(new InputStreamReader(in));
             String user_dict = null;
             while ((user_dict = br.readLine()) != null) {
                String[] records = user_dict.split(",");
                //加载字典表
                dictTable.put(records[0], user_dict);
             }
		}
		
		@Override
		protected void reduce(Text arg0, Iterable<Text> arg1, Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			
			 
		 
			SummaryStatistics statistics = new SummaryStatistics ();
			//计算访问访问page的平均年龄
			for(Text value : arg1){
				String id = value.toString().split(",")[0];
				String userInfo = dictTable.get(id);
				String age = userInfo.split(",")[2];
				statistics.addValue(Double.valueOf(age));
				System.out.println(value);
			}
			
			double avg = statistics.getMean();
			context.write(arg0, new Text(avg+""));
		}


		
	}
	
	@Override
	public int run(String[] args) throws Exception {
		
        Configuration conf =getConf();
        conf.set("mapreduce.input.fileinputformat.input.dir.recursive", "true");
        
        Job job =  Job.getInstance(conf,PageVisitAge_Avg.class.getSimpleName());                
        job.setJarByClass(getClass());
        
        /**
         * 用户访问的网页历史
         * 例如:
         * 		1,http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
         * 		2,http://docs.oracle.com/javase/8/
         *		3,http://docs.oracle.com/javase/8/docs/api/index.html
         */
        FileInputFormat.addInputPath(job,new Path(args[0]));
        
        /**
         * 用户字典
         * 例如:
         * 	1,zhangsan,20,male
		 *	2,zhangsi,23,female
		 *	3,susan,30,male
         * 
         */
        job.addCacheFile(new URI(args[1])); 
        
        FileOutputFormat.setOutputPath(job,new Path(args[2])); 
        job.setMapOutputKeyClass(Text.class);
        job.setOutputFormatClass(TextOutputFormat.class);
     	   
        job.setMapperClass(PageVisitMapper.class);
        job.setReducerClass(PageVisitReducer.class);
        
        job.setNumReduceTasks(10);

        return job.waitForCompletion(true)?0:1;  
	}
	
	/**
	 * 
	 * @param args
	 * @throws Exception
	 */
	public static void main(String [] args ) throws Exception{
		int ec = ToolRunner.run(new Configuration(), new PageVisitAge_Avg(), args);
        System.exit(ec);
	}

	
}

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics