Scala题目
Scala题目
数据在bigdata19-scala/data中
题目:
基础
1、统计班级人数 [班级,人数]
2、统计学生的总分 [学号,学生姓名,学生年龄,总分]
进阶
1、统计年级排名前十学生各科的分数 [学号,姓名,班级,科目,分数]
2、统计总分大于年级平均分的学生 [学号,姓名,班级,总分]
3、统计每科都及格的学生 [学号,姓名,班级,科目,分数]
4、统计每个班级的前三名 [学号,姓名,班级,分数]
5、统计偏科最严重的前100名学生 [学号,姓名,班级,科目,分数]
package homeWork import org.junit.{Before, Test} import scala.io.{BufferedSource, Source} class teacherAnswer { var stu: List[Students] = _ var sco: List[Score] = _ var sub: List[Subject] = _ var stuInfoMap: Map[String, String] = _ var subNameMap: Map[String, String] = _ var subScoreMap: Map[String, Int] = _ // 分别定义学生、分数、科目数据的样例类 case class Students(id: String, name: String, age: Int, gender: String, clazz: String) case class Score(id: String, subject_id: String, score: Int) case class Subject(subject_id: String, subject_name: String, subject_score: Int) // 基于id的List 关联三份数据 最终输出 [学号,姓名,班级,科目,分数] def printAndFilterWithIds(ids: List[String]): Unit = { // 基于分数数据结合两个Map进行关联 sco .filter(s => ids.contains(s.id)) .map(s => { val nameAndClazz: String = stuInfoMap.getOrElse(s.id, "无此学生") val subjectName: String = subNameMap.getOrElse(s.subject_id, "无此科目") s"${s.id},$nameAndClazz,$subjectName,${s.score}" }) .foreach(println) } // 加载数据 @Before def loadData(): Unit = { // 读数据并且将每条数据转换成对应的样例类对象然后构成List val stuBS: BufferedSource = Source.fromFile("data/students.txt") stu = stuBS .getLines() .toList .map(s => { val splits = s.split(",") val id: String = splits(0) val name: String = splits(1) val age: Int = splits(2).toInt val gender: String = splits(3) val clazz: String = splits(4) Students(id, name, age, gender, clazz) }) // 以id作为key,name和clazz作为value构建Map stuInfoMap = stu .map(s => (s.id, s.name + "," + s.clazz)) .toMap val scoBS: BufferedSource = Source.fromFile("data/score.txt") sco = scoBS .getLines() .toList .map(s => { val splits = s.split(",") val id: String = splits(0) val subject_id: String = splits(1) val score: Int = splits(2).toInt Score(id, subject_id, score) }) val subBS: BufferedSource = Source.fromFile("data/subject.txt") sub = subBS .getLines() .toList .map(s => { val splits = s.split(",") val subject_id: String = splits(0) val subject_name: String = splits(1) val subject_score: Int = splits(2).toInt Subject(subject_id, subject_name, subject_score) }) // 以科目id作为key,科目名作为value构建Map subNameMap = sub .map(s => (s.subject_id, s.subject_name)) .toMap // 以科目id作为key,科目总分作为value构建Map subScoreMap = sub .map(s => (s.subject_id, s.subject_score)) .toMap stuBS.close() scoBS.close() subBS.close() } @Test // 将加载的数据打印 def printData(): Unit = { stu.take(10).foreach(println) sco.take(10).foreach(println) sub.take(10).foreach(println) } @Test // 统计班级人数" def clazzCount(): Unit = { stu // List在groupBy之后会返回一个Map .groupBy(s => s.clazz) .map(kv => kv._1 + "," + kv._2.size) .foreach(println) } @Test // 统计学生的总分 [学号,学生姓名,学生年龄,总分] def stuSumScore(): Unit = { // 统计每个学生的总分 val stuSumScoreMap: Map[String, Int] = sco .groupBy(s => s.id) // 当List在groupBy之后再调用map方法,如果map方法返回的是一个二元组,则最终会得到一个Map .map(kv => (kv._1, kv._2.map(_.score).sum)) // 依次遍历每一条学生的数据,通过id从stuSumScoreMap中获取学生总分 stu .map(s => { // 获取总分 val sumScore: Int = stuSumScoreMap.getOrElse(s.id, 0) s"${s.id},${s.name},${s.age},$sumScore" }) .foreach(println) } @Test // 1、统计年级排名前十学生各科的分数 [学号,姓名,班级,科目,分数] def question01(): Unit = { // 找出排名前10学生的id val top10Ids: List[String] = sco .groupBy(s => s.id) .map(kv => (kv._1, kv._2.map(_.score).sum)) .toList .sortBy(kv => -kv._2) .take(10) .map(_._1) printAndFilterWithIds(top10Ids) } @Test // 2、统计总分大于年级平均分的学生 [学号,姓名,班级,总分] def question02(): Unit = { // 统计年级的平均分 372.704 val avgSumScore: Double = sco.map(_.score).sum / stu.size.toDouble // 统计每个学生的总分 sco .groupBy(_.id) .map(kv => (kv._1, kv._2.map(_.score).sum)) // 过滤出总分大于平均分的学生 .filter(kv => kv._2 > avgSumScore) // 通过id与学生数据进行关联 .map(kv => { val nameAndClazz: String = stuInfoMap.getOrElse(kv._1, "无此学生信息") s"${kv._1},$nameAndClazz,${kv._2}" }) .foreach(println) } @Test // 3、统计每科都及格的学生 [学号,姓名,班级,科目,分数] def question03(): Unit = { // 关联分数和学科的数据 val ids: List[String] = sco .filter(s => s.score >= subScoreMap.getOrElse(s.subject_id, 0) * 0.6) // 统计每个学生及格的科目数 .groupBy(_.id) .map(kv => (kv._1, kv._2.size)) .filter(kv => kv._2 == 6) .keys .toList printAndFilterWithIds(ids) } @Test // 4、统计每个班级的前三名 [学号,姓名,班级,分数] def question04(): Unit = { // 统计学生总分 sco .groupBy(_.id) .map(kv => (kv._1, kv._2.map(_.score).sum)) // 基于id从学生信息Map中获取学生名字以及班级 .map(kv => { val nameAndClazz: String = stuInfoMap.getOrElse(kv._1, "无此学生信息") val splits = nameAndClazz.split(",") val name: String = splits(0) val clazz: String = splits(1) (kv._1, name, clazz, kv._2) }) .groupBy(_._3) .flatMap(kv => kv._2.toList.sortBy(-_._4).take(3) ) .foreach(println) } @Test // 5、统计偏科最严重的前100名学生 [学号,姓名,班级,科目,分数] def question05(): Unit = { val ids:List[String] = sco // 对数据进行归一化处理 .map(s => (s.id, s.subject_id, s.score * 100.toDouble / subScoreMap.getOrElse(s.subject_id, 0))) .groupBy(_._1) .map(kv => { val id: String = kv._1 val scoreList: List[(String, String, Double)] = kv._2 // 计算平均分 val stuAvgScore: Double = scoreList.map(_._3).sum / scoreList.size // 计算方差 val variance: Double = scoreList.map(t3 => Math.pow(t3._3 - stuAvgScore, 2)).sum / scoreList.size (id, variance) }) .toList .sortBy(-_._2) .take(100) .map(_._1) printAndFilterWithIds(ids) } }