测试数据生成与自动化测试方法
数据生成
UNIT3的测试数据生成很简单,首先是随机数据的生成,我写了一个python脚本来完成。通过不同的参数设定,可以满足强测与互测的数据需求。
import os
import random
import re
import string
test_dir = "D:\\BUAA_study\\OO\\homework11\\test"
# strong test parameters
MAX_INST_NUM = 5000
MAX_AP_NUM = 2500
MAX_QC_NUM = 100
MAX_AG_NUM = 20
MAX_QLC_NUM = 20
MAX_SIM_NUM = 500
AGE_L = 0
AGE_R = 200
VALUE_L = 0
VALUE_R = 1000
MAX_NAME_LINE = 10
MAX_STRING_LINE = 100
EMOJIID_L = 0
EMOJIID_R = 10000
SOCIALVALUE_L = -1000
SOCIALVALUE_R = 1000
MONEY_L = 0
MONEY_R = 200
ID_L = -100
ID_R = 100
def getRN(a, b):
return random.randint(a,b)
def getRS(num):
return ''.join(random.choice(string.ascii_letters) for x in range(num))
def genData():
os.chdir(test_dir)
DATA = open("input.txt", "w")
print("\n", file=DATA)
inst_num = 0
ap_num = 0
qc_num = 0
ag_num = 0
qlc_num = 0
sim_num = 0
while inst_num < MAX_INST_NUM:
op = getRN(1, 26)
line = ""
if op == 1:
if ap_num < MAX_AP_NUM:
ap_num = ap_num + 1
line = "ap " + str(getRN(ID_L, ID_R)) + " " + str(getRS(MAX_NAME_LINE)) + " " + str(getRN(AGE_L, AGE_R))
elif op == 2:
line = "ar " + str(getRN(ID_L, ID_R)) + " " + str(getRN(ID_L, ID_R)) + " " + str(getRN(VALUE_L, VALUE_R))
elif op == 3:
line = "qv " + str(getRN(ID_L, ID_R)) + " " + str(getRN(ID_L, ID_R))
elif op == 4:
line = "qps"
elif op == 5:
if qc_num < MAX_QC_NUM:
qc_num = qc_num + 1
line = "qc " + str(getRN(ID_L, ID_R)) + " " + str(getRN(ID_L, ID_R))
elif op == 6:
line = "qbs"
elif op == 7:
if ag_num < MAX_AG_NUM:
ag_num = ag_num + 1
line = "ag " + str(getRN(ID_L, ID_R))
elif op == 8:
line = "atg " + str(getRN(ID_L, ID_R)) + " " + str(getRN(ID_L, ID_R))
elif op == 9:
line = "dfg " + str(getRN(ID_L, ID_R)) + " " + str(getRN(ID_L, ID_R))
elif op == 10:
line = "qgps " + str(getRN(ID_L, ID_R))
elif op == 11:
line = "qgvs " + str(getRN(ID_L, ID_R))
elif op == 12:
line = "qgav " + str(getRN(ID_L, ID_R))
elif op == 13:
line = "am " + str(getRN(ID_L, ID_R)) + " " + str(getRN(SOCIALVALUE_L, SOCIALVALUE_R)) + " " + str(getRN(0, 1)) + " " + str(getRN(ID_L, ID_R)) + " " + str(getRN(ID_L, ID_R))
elif op == 14:
line = "sm " + str(getRN(ID_L, ID_R))
elif op == 15:
line = "qsv " + str(getRN(ID_L, ID_R))
elif op == 16:
line = "qrm " + str(getRN(ID_L, ID_R))
elif op == 17:
if qlc_num < MAX_QLC_NUM:
qlc_num = qlc_num + 1
line = "qlc " + str(getRN(ID_L, ID_R))
elif op == 18:
line = "arem " + str(getRN(ID_L, ID_R)) + " " + str(getRN(0, MONEY_R)) + " " + str(getRN(0, 1)) + " " + str(getRN(ID_L, ID_R)) + " " + str(getRN(ID_L, ID_R))
elif op == 19:
line = "anm " + str(getRN(ID_L, ID_R)) + " " + getRS(MAX_STRING_LINE) + " " + str(getRN(0, 1)) + " " + str(getRN(ID_L, ID_R)) + " " + str(getRN(ID_L, ID_R))
elif op == 20:
line = "cn " + str(getRN(ID_L, ID_R))
elif op == 21:
line = "aem " + str(getRN(ID_L, ID_R)) + " " + str(getRN(EMOJIID_L, EMOJIID_R)) + " " + str(getRN(0, 1)) + " " + str(getRN(ID_L, ID_R)) + " " + str(getRN(ID_L, ID_R))
elif op == 22:
line = "sei " + str(getRN(ID_L, ID_R))
elif op == 23:
line = "qp " + str(getRN(ID_L, ID_R))
elif op == 24:
line = "dce " + str(getRN(0, EMOJIID_R))
elif op == 25:
line = "qm " + str(getRN(ID_L, ID_R))
elif op == 26:
if sim_num < MAX_SIM_NUM:
sim_num = sim_num + 1
line = "sim " + str(getRN(ID_L, ID_R))
if line != "":
inst_num = inst_num + 1
print(line, file = DATA)
DATA.close()
print("inst_num: " + str(inst_num))
print("ap_num: " + str(ap_num))
print("qc_num: " + str(qc_num))
print("ag_num: " + str(ag_num))
print("qlc_num: " + str(qlc_num))
print("sim_num: " + str(sim_num))
genData()
当然这种随机数据生成只能满足一定的覆盖率要求,还需要生成特殊数据来进行针对性测试,比如对于求最短路的指令。还有就是一定要仔细阅读代码中给出的JML规格,JML规格中有很多反直觉的地方,比如群组人数 1111 的上限,getReceivedMessages 是返回最近四条等。
老师很推荐JUNIT单元测试,但我学习了一下之后发现针对UNIT3,JUNIT的效率还是有些低了。这个就相当于写testbench,对于硬件这种测试方式是很好的,因为硬件涉及时序,逻辑复杂度本身就很容易很高。 但是对于软件,对于自己很了解的模块,感觉不需要写JUNIT,写了也容易犯同样的逻辑错误。
自动化对拍
使用powershell脚本进行自动化对拍。个人感觉powershell是目前最友好的终端。它的编程语法与C很像,不像bash那样繁琐,同时全面兼容windows环境。同时,powershell还以命令别名的方式支持了大部分常用的bash命令。
对拍逻辑为生成一组数据,重定向至所有DUT中,收集输出并进行笛卡尔积比对。最后将DUT运行时间与输出写入日志文件中。该脚本支持特定数据比对与随机数据生成比对。
# 特定数据测试方法: powershell输入 .\AutoTest.ps1 sp <testcasename>
# 随机数据测试: powershell输入 .\AutoTest.ps1会生成10组数据比对
echo "Autotest run..."
$names = @("wyj", "ghy")
function run_test {
param($name, $case)
echo "$name begin run"
Measure-Command {cat $case | java -jar "$name.jar" > "$name.txt" }
echo "$name end run"
}
function run_cmp {
param($name1, $name2)
echo "diff $name1 $name2"
diff (cat "$name1.txt") (cat "$name2.txt")
}
if($args[0] -eq "sp" ) {
echo " " > error.txt
echo test$args[1]
echo test$args[1] > .\error.txt
for ($i=0; $i -lt $names.Length; $i=$i + 1) {
for($j=0; $j -lt $i;$j=$j + 1) {
run_test $names[$i] $args[1] >> .\error.txt
run_test $names[$j] $args[1] >> .\error.txt
run_cmp $names[$i] $names[$j] >> .\error.txt
}
}
} else {
echo "" > error.txt
$a=0
while ( $a -ne 1 ) {
python .\genData.py
cp "input.txt" "testcase\input$a.txt"
sleep 1
echo test$a
echo test$a >> .\error.txt
for ($i=0; $i -lt $names.Length; $i=$i + 1) {
for($j=0; $j -lt $i;$j=$j + 1) {
run_test $names[$i] "input.txt" >> .\error.txt
run_test $names[$j] "input.txt" >> .\error.txt
run_cmp $names[$i] $names[$j] >> .\error.txt
}
}
echo "test $a done"
echo "test $a done" >> .\error.txt
$a=$a+1
}
echo "test done"
}
架构设计与图算法
按要求实现所有的类与函数。对于图相关函数,另外建立工具类存储。
对于JML中声明为集合的元素,基本都使用了HashMap存储,方便查找。
private final HashMap<Integer, Person> idToPeople; // this is "people"
private final HashMap<Integer, Group> idToGroup; // this is "groups"
private final HashMap<Integer, Message> idToMessage;
private final HashMap<Integer, Integer> idToEmojiHeat;
图算法
图存储
socialNet为一个有权无向图。人是节点,人与人的关系就是边。
我采用了嵌套HashMap模拟二维数组来存储图:
private final HashMap<Integer, HashMap<Integer, Integer>> graph;
这样图算法会比较好写,同时由于hash表中只存储有效的链接,因此克服了二维数组对于稀疏图的时间复杂度高的缺点。
图算法
求最短路的dijkstra算法采用了java自带的优先队列优化:
public static int dijkstra(int fromId, int toId) {
HashSet<Integer> collected = new HashSet<>();
collected.add(fromId);
HashMap<Integer, Integer> idToDist = new HashMap<>();
idToDist.put(fromId, 0);
HashMap<Integer, Integer> line;
PriorityQueue<Item> priorityQueue = new PriorityQueue<>();
line = graph.get(fromId);
for (int id : line.keySet()) {
int value = line.get(id);
priorityQueue.add(new Item(id, value));
}
while (!priorityQueue.isEmpty()) {
Item min = priorityQueue.remove();
if (collected.contains(min.getId())) {
continue;
}
collected.add(min.getId());
idToDist.put(min.getId(), min.getValue());
line = graph.get(min.getId());
for (int id : line.keySet()) {
int value = line.get(id);
if (!idToDist.containsKey(id)) {
idToDist.put(id, min.getValue() + value);
priorityQueue.add(new Item(id, min.getValue() + value));
} else {
int oldValue = idToDist.get(id);
if (oldValue > min.getValue() + value) {
idToDist.replace(id, min.getValue() + value);
priorityQueue.add(new Item(id, min.getValue() + value));
}
}
}
}
return idToDist.get(toId);
}
qlc指令采用prim最小生成树算法优化。总复杂度 $O(n2)$由于使用嵌套Hash表存储图,因此复杂度远小于$O(N2)$
private int prim(int id) {
int sum = 0;
int minId = 0;
int minDist;
if (!graph.containsKey(id)) {
return 0;
}
HashSet<Integer> inGraph = new HashSet<>();
inGraph.add(id);
HashMap<Integer, Integer> dist = new HashMap<>(graph.get(id));
while (dist.size() != 0) {
minDist = 2000000;
for (Integer distId : dist.keySet()) {
if (dist.get(distId) < minDist) {
minDist = dist.get(distId);
minId = distId;
}
}
sum += minDist;
dist.remove(minId);
if (inGraph.contains(minId) || !graph.containsKey(minId)) {
continue;
}
inGraph.add(minId);
HashMap<Integer, Integer> minIdDist = graph.get(minId);
for (Integer minIdDistId : minIdDist.keySet()) {
if (inGraph.contains(minIdDistId)) {
continue;
}
if (!dist.containsKey(minIdDistId)) {
dist.put(minIdDistId, minIdDist.get(minIdDistId));
} else if (minIdDist.get(minIdDistId) < dist.get(minIdDistId)) {
dist.remove(minIdDistId);
dist.put(minIdDistId, minIdDist.get(minIdDistId));
}
}
}
return sum;
}
同时,使用了并查集来索引所有的群组。并查集实现了路径压缩。并查集的查询使用了递归实现,代码很简洁。
private final HashMap<Integer, Integer> andCheck;
private int find(int i) {
if (andCheck.containsKey(i)) {
if (andCheck.get(i) == i) {
return i;
}
andCheck.put(i, find(andCheck.get(i)));
return andCheck.get(i);
} else {
andCheck.put(i, i);
return i;
}
}
private void join(int x, int y) {
int ff1 = find(x);
int ff2 = find(y);
if (ff1 != ff2) {
setNum--;
}
andCheck.put(ff1, ff2);
}
性能优化
getReceivedMessages
getReceivedMessages 指令要求返回最近四条指令, JML中对于消息的添加也是加到集合的最前面。这要求消息集合要维护有序性。但是如果直接按照JML的方式,用ArrayList存储消息并将新来的消息加到最前面,则每次添加消息都需要移动整个集合,时间复杂度很高。我采用的优化方法是,使用ArrayList模拟消息队列,新来的消息加到队列的尾部,查询时返回队列尾部的四条消息。
query_group_value_sum
如果每次查询都直接计算值的和,则复杂度会很高。我采用的优化方式是维护一个valueSum值,当有新的边加入或有边被移除时都修改这个值。这样一来这个指令就是$O(1)$的了。
并查集
使用并查集维护图中的联通块。并使用路径压缩优化。query_circle 与query_block_sum 两个指令的时间复杂度近似为$O(1)$。
bug分析
三次作业均未查出bug。
Network 扩展
假设出现了几种不同的Person
- Advertiser:持续向外发送产品广告
- Producer:产品生产商,通过Advertiser来销售产品
- Customer:消费者,会关注广告并选择和自己偏好匹配的产品来购买 -- 所谓购买,就是直接通过Advertiser给相应Producer发一个购买消息
- Person:吃瓜群众,不发广告,不买东西,不卖东西
如此Network可以支持市场营销,并能查询某种商品的销售额和销售路径等 请讨论如何对Network扩展,给出相关接口方法,并选择3个核心业务功能的接口方法撰写JML规格(借:鉴所总结的JML规格模式)
实现方法
Advertiser、Producer 和 Customer 继承自 Person,Advertisement 和 BuyMessage继承自 Message。
发送广告的JML规格:
/*@ public normal_behavior
@ requires containsMessage(id) && (getMessage(id) instanceof Advertisement);
@ assignable messages;
@ assignable people[*].messages;
@ ensures (\forall int i; 0 <= i && i < people.length && getMessage(id).getPerson1().isLinked(people[i]);
@ (\forall int j; 0 <= j && j < \old(people[i].getMessages().size());
@ people[i].getMessages().get(j+1) == \old(people[i].getMessages().get(j))) &&
@ people[i].getMessages().get(0).equals(\old(getMessage(id))) &&
@ people[i].getMessages().size() == \old(people[i].getMessages().size()) + 1);
@ ensures !containsMessage(id) && messages.length == \old(messages.length) - 1 &&
@ (\forall int i; 0 <= i && i < \old(messages.length) && \old(messages[i].getId()) != id;
@ (\exists int j; 0 <= j && j < messages.length; messages[j].equals(\old(messages[i]))));
@ ensures (\forall int i; 0 <= i && i < people.length && !getMessage(id).getPerson1().isLinked(people[i]);
@ people[i].getMessages().equals(\old(people[i].getMessages()));
@ also
@ public exceptional_behavior
@ signals (MessageIdNotFoundException e) !containsMessage(id);
@ signals (ClassTypeException e) !(getMessage(id) instanceof Advertisement);
@*/
public void sendAdvertisement(int id) throws
MessageIdNotFoundException, ClassTypeException;
Producer生产产品的JML:
/*@ public normal_behavior
@ requires contains(producerId) && (getPerson(producerId) instanceof Producer);
@ assignable getProducer(producerId).productCount;
@ ensures getProducer(producerId).getProductCount(productId) ==
@ \old(getProducer(producerId).getProductCount(productId)) + 1;
@ also
@ public exceptional_behavior
@ signals (PersonIdNotFoundException e) !contains(producerId);
@ signals (ClassTypeException e) !(getPerson(producerId) instanceof Producer);
@*/
public void produceProduct(int producerId, int productId) throws
PersonIdNotFoundException, ClassTypeException;
询问销售额的JML:
/*@ public normal_behavior
@ requires containsProductId(productId);
@ ensures \result == productList(id).getSalesAmount();
@ also
@ public exceptional_behavior
@ signals (ProductNotFoundException e) !containsProduct(productId);
@*/
public int querySaleAmount(int productId) throws ProductNotFoundException;
体会感想
本单元的难度不大,我体会到了基于规格的设计方式,并复习了一些数据结构内容。
在给定的规格下,具体实现可以是多种多样的。
不过,JML有些过于具体细节,而往往对于整体的认识才有助于我们理解架构,进行优化,避免出bug。建议课程组在给出JML的同时对本次作业要完成的设计进行一些功能与架构的文字描述。而不是让我们从JML的字里行间中猜测程序功能。