map map

下面的无法运行。
@Override
        protected void map(LongWritable key, Text value,
                Mapper<LongWritable, Text, Text, DoubleWritable>.Context context)
                        throws IOException, InterruptedException {
            Configuration conf = context.getConfiguration();
            int tot = Integer.parseInt(conf.get("TOTALWORDS"));

            System.out.println("total === " + total);
            System.out.println("tot = " + tot);

            // 输入的格式如下:
            // ALB weekend 1
            // ALB weeks 3
            Map<String, List<String>> baseMap = new HashMap<String, List<String>>(); // 保存基础数据
//            Map<String, List<Double>> priorMap = new HashMap<String, List<Double>>(); // 保存每个单词出现的概率

            String[] temp = value.toString().split("\t");
            // 先将数据存到baseMap中
            if (temp.length == 3) {
                // 文件夹名类别名temp[0]
                String wordAndNumber = null;
                wordAndNumber = temp[1] + "\t" + temp[2];
                if (baseMap.containsKey(temp[0])) {

                    baseMap.get(temp[0]).add(wordAndNumber);
                } else {
                    List<String> oneList = new ArrayList<String>();
                    oneList.add(wordAndNumber);
                    baseMap.put(temp[0], oneList);
                }

            } // 读取数据完毕,全部保存在baseMap中

            // 两层循环计算出每个类别中每个单词的概率

            Iterator<Map.Entry<String, List<String>>> iterators = baseMap.entrySet().iterator();
            while (iterators.hasNext()) {// 遍历类别
                Map.Entry<String, List<String>> iterator = iterators.next();
                int allWordsInClass = 0;

                // list遍历
                Iterator<String> its = iterator.getValue().iterator();

                // 得到每个类别的单词总数
                while (its.hasNext()) {
                    String[] temp1 = its.next().split("\t");
                    allWordsInClass += Integer.parseInt(temp1[1]);
                }
                System.out.println(allWordsInClass);// 这个数据没有计算成功????

                //
//                Map<String, List<Double>> pMap = new HashMap<String, List<Double>>();
//                List<Double> pList = new ArrayList<Double>();
                // 遍历每个单词的词频计算器概率
                while (its.hasNext()) {
                    String[] temp1 = its.next().split("\t");
                    double p = (Integer.parseInt(temp1[1]) + 1) / (allWordsInClass + total);
                    String classAndWord = iterator.getKey() + "\t" + temp1[0];
                    className.set(classAndWord);
                    number.set(p);
                    LOG.info("------>p = " + p);
//                    context.write(className, number);
                    mos.write(iterator.getKey(), temp1[0], p);
                }

            }
        }

 

protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DoubleWritable>.Context context)
                        throws IOException, InterruptedException {
            Configuration conf = context.getConfiguration();
            int tot = Integer.parseInt(conf.get("TOTALWORDS"));
            
            System.out.println("total === " + total);
            System.out.println("tot = " + tot);
            
            // 输入的格式如下:
            // ALB weekend 1
            // ALB weeks 3
            Map<String, Map<String, Integer>> baseMap = new HashMap<String, Map<String, Integer>>(); // 保存基础数据
            Map<String, Map<String, Double>> priorMap = new HashMap<String, Map<String, Double>>(); // 保存每个单词出现的概率

            String[] temp = value.toString().split("\t");
            // 先将数据存到baseMap中
            if (temp.length == 3) {
                // 文件夹名类别名
                if (baseMap.containsKey(temp[0])) {
                    baseMap.get(temp[0]).put(temp[1], Integer.parseInt(temp[2]));
                } else {
                    Map<String, Integer> oneMap = new HashMap<String, Integer>();
                    oneMap.put(temp[1], Integer.parseInt(temp[2]));
                    baseMap.put(temp[0], oneMap);
                }

            } // 读取数据完毕,全部保存在baseMap中

            // 两层循环计算出每个类别中每个单词的概率
            Iterator<Map.Entry<String, Map<String, Integer>>> iterators = baseMap.entrySet().iterator();
            while (iterators.hasNext()) {// 遍历类别
                Map.Entry<String, Map<String, Integer>> iterator = iterators.next();
                int allWordsInClass = 0;

                for (Map.Entry<String, Integer> entry : iterator.getValue().entrySet()) {// 遍历类别中的单词,先求出类别中的单词总数
                    allWordsInClass += entry.getValue();
                }
                System.out.println(allWordsInClass);//这个数据没有计算成功
                //
                Map<String, Double> pMap = new HashMap<String, Double>();
                for (Map.Entry<String, Integer> entry : iterator.getValue().entrySet()) {// 在遍历每个单词的个数计算单词出现的概率
                    double p = (entry.getValue()+ 1.0) / (allWordsInClass + tot);//
                    pMap.put(entry.getKey(), p);
                    priorMap.put(iterator.getKey(), pMap);
                    className.set(iterator.getKey() + "\t" + entry.getKey());
                    number.set(p);
                    LOG.info("------>p = " + p);

                    context.write(className, number);
//                    mos.write(iterator.getKey(), entry.getKey(), p);
                }

            }
            
            
            
            /*
             * value.set(temp[1]); number.set(Integer.parseInt(temp[2]));
             * mos.write(value, number, dirName);
             */
        }

 

posted on 2015-11-06 10:12  IT小不点  阅读(399)  评论(0编辑  收藏  举报