Online Judge(OJ)搭建——4、具体实现
代码编译、运行、保存:
本系统目前支持 Java、C++ 的编译。如有其他语言需要编译,扩展也很简单,因为这里使用了一个抽象类LanguageTest,处理好代码运行编译之前的文件保存,代码运行之中的测试用例读取,代码运行编译之后的数据保存。主要利用了面向对象的多态性。
package per.piers.onlineJudge.service; import org.springframework.stereotype.Service; import per.piers.onlineJudge.Exception.ExistenceException; import per.piers.onlineJudge.controller.TestController; import per.piers.onlineJudge.model.InputOutput; import per.piers.onlineJudge.model.TestInfo; import java.io.*; import java.sql.Timestamp; import java.util.ArrayList; import java.util.List; import java.util.Properties; import java.util.Scanner; @Service public abstract class LanguageTest { private int uid; private int qid; private long submitTime; protected String code; protected String codeDir; protected String codeFile; private boolean isCompiled = false; private List<String> compileCommands = new ArrayList<>(); private List<String> executeCommands = new ArrayList<>(); protected LanguageTest(int uid, int qid, String code, long submitTime) { this.uid = uid; this.qid = qid; this.code = code; this.submitTime = submitTime; Properties properties = new Properties(); try { try (InputStream inputStream = TestController.class.getClassLoader().getResourceAsStream("config/codeProcessor/codeProcessor.properties")) { properties.load(inputStream); String tmpDir = properties.getProperty("path"); this.codeDir = String.format("%s/%s/%s/%s/", tmpDir, uid, qid, submitTime); this.codeFile = String.format("%s/%s", codeDir, getCodeFileName()); } } catch (IOException e) { e.printStackTrace(); } this.compileCommands = getCompileCommands(); this.executeCommands = getExecuteCommands(); } protected abstract List<String> getCompileCommands(); protected abstract List<String> getExecuteCommands(); protected abstract String getCodeFileName(); public String compile() throws IOException { File codeFile = new File(this.codeFile); if (!codeFile.exists()) { codeFile.getParentFile().mkdirs(); codeFile.createNewFile(); } else { throw new ExistenceException("temp code file"); } try (FileWriter writer = new FileWriter(codeFile)) { writer.write(code); writer.flush(); } //TODO: Docker 权限控制 ProcessBuilder processBuilder = new ProcessBuilder(compileCommands); processBuilder.directory(new File(codeDir)); processBuilder.redirectErrorStream(true); Process process = processBuilder.start(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { StringBuilder output = new StringBuilder(); String line = null; while ((line = reader.readLine()) != null) output.append(line + "\n"); isCompiled = true; return output.toString().isEmpty() ? null : output.toString(); } } public TestInfo execute(ArrayList<InputOutput> inputOutputs) throws IOException { if (!isCompiled) throw new IllegalStateException("not compiled"); int correct = 0; ArrayList<InputOutput> results = new ArrayList<>(); // test all test cases for (InputOutput inputOutput : inputOutputs) { String output = test(inputOutput.getInput()); InputOutput actualInputOutput = new InputOutput(); actualInputOutput.setInput(inputOutput.getInput()); actualInputOutput.setOutput(output); if (output.equals(inputOutput.getOutput())) { correct++; actualInputOutput.setCorrect(true); } else { actualInputOutput.setCorrect(false); } results.add(actualInputOutput); } TestInfo testInfo = new TestInfo(uid, qid, new Timestamp(submitTime), code, (double) correct / (double) inputOutputs.size()); testInfo.setInputOutputs(results); return testInfo; } protected String test(String input) throws IOException { ProcessBuilder processBuilder = new ProcessBuilder(executeCommands); processBuilder.directory(new File(codeDir)); processBuilder.redirectErrorStream(true); Process process = processBuilder.start(); try (OutputStream outputStream = process.getOutputStream()) { outputStream.write(input.getBytes("UTF-8")); outputStream.flush(); } StringBuilder results = new StringBuilder(); try (Scanner in = new Scanner(process.getInputStream())) { while (in.hasNextLine()) results.append(in.nextLine()); } return results.toString(); } }
在子类中,只需要设置一些参数即可扩展,比如Docker编译Java的命令、Docker运行Java的命令、代码文件名。
package per.piers.onlineJudge.service; import java.util.ArrayList; import java.util.List; public class JavaTest extends LanguageTest { public JavaTest(int uid, int qid, String code, long submitTime) { super(uid, qid, code, submitTime); } @Override protected List<String> getCompileCommands() { ArrayList<String> compileCommands = new ArrayList<>(); compileCommands.add("docker"); compileCommands.add("run"); compileCommands.add("--rm"); compileCommands.add("-u"); compileCommands.add("root"); compileCommands.add("-v"); compileCommands.add(String.format("%s:%s", codeDir, codeDir)); compileCommands.add("openjdk:8"); compileCommands.add("/bin/sh"); compileCommands.add("-c"); compileCommands.add(String.format("cd %s&&javac Main.java", codeDir)); return compileCommands; } @Override protected List<String> getExecuteCommands() { ArrayList<String> executeCommands = new ArrayList<>(); executeCommands.add("docker"); executeCommands.add("run"); executeCommands.add("-i"); executeCommands.add("--rm"); executeCommands.add("-u"); executeCommands.add("root"); executeCommands.add("-v"); executeCommands.add(String.format("%s:%s", codeDir, codeDir)); executeCommands.add("openjdk:8"); executeCommands.add("/bin/sh"); executeCommands.add("-c"); executeCommands.add(String.format("cd %s&&timeout 3s java Main", codeDir)); return executeCommands; } @Override protected String getCodeFileName() { return "Main.java"; } }
package per.piers.onlineJudge.service; import per.piers.onlineJudge.model.InputOutput; import per.piers.onlineJudge.model.TestInfo; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class CppTest extends LanguageTest { public CppTest(int uid, int qid, String code, long submitTime) { super(uid, qid, code, submitTime); } @Override protected List<String> getCompileCommands() { ArrayList<String> compileCommands = new ArrayList<>(); compileCommands.add("docker"); compileCommands.add("run"); compileCommands.add("--rm"); compileCommands.add("-u"); compileCommands.add("root"); compileCommands.add("-v"); compileCommands.add(String.format("%s:%s", codeDir, codeDir)); compileCommands.add("gcc:7"); compileCommands.add("/bin/sh"); compileCommands.add("-c"); compileCommands.add(String.format("cd %s&&g++ Main.cpp", codeDir)); return compileCommands; } @Override protected List<String> getExecuteCommands() { ArrayList<String> executeCommands = new ArrayList<>(); executeCommands.add("docker"); executeCommands.add("run"); executeCommands.add("--rm"); executeCommands.add("-i"); executeCommands.add("-u"); executeCommands.add("root"); executeCommands.add("-v"); executeCommands.add(String.format("%s:%s", codeDir, codeDir)); executeCommands.add("gcc:7"); executeCommands.add("/bin/sh"); executeCommands.add("-c"); executeCommands.add(String.format("cd %s&&timeout 3s ./a.out", codeDir)); return executeCommands; } @Override protected String getCodeFileName() { return "Main.cpp"; } }
这里利用 Docker 进行代码编译。Docker 是一个虚拟容器,放在 Docker 中运行的程序不会影响操作系统,也不会影响 Docker 容器中其他的程序。恶意代码在 Docker 中被执行,容器只会被破坏,不会有别的影响,此时只需重启容器即可。
Docker 编译 Java 命令:Docker run --rm -u root -v /onlineJudge:/onlineJudge openjdk:8 /bin/sh -c cd /onlineJudge&&javac Main.java
其中,--rm 是用完删除容器,-u root 是以 root 身份运行(此 root 不等于操作系统中 root,权限低了很多),-v /onlineJudge:/onlineJudge 是挂在卷,存放代码的位置,openjdk:8 就是镜像名和版本,/bin/sh -c cd /onlineJudge&&javac Main.java 是容器启动之后运行的命令,利用 shell 进入 /onlineJudge 文件夹并执行 javac Main.java 的命令,&& 表示同时执行。
Docker 运行 Java 命令:Docker run --rm -i -u root -v /onlineJudge:/onlineJudge openjdk:8 /bin/sh -c cd /onlineJudge&&timeout 3s Main
其中,-i 表示容器接收系统输入输出流。timeout 为 Linux 限时函数。
Docker 编译 C++ 命令:Docker run --rm -u root -v /onlineJudge:/onlineJudge openjdk:8 /bin/sh -c cd /onlineJudge&&g++ Main.cpp
Docker 运行 C++ 命令:Docker run --rm -i -u root -v /onlineJudge:/onlineJudge openjdk:8 /bin/sh -c cd /onlineJudge&&timeout 3s ./a.out
Token 生成:
token 在用户在注册或者忘记密码时生成的。在用户注册或者忘记密码时,要给予根据一定条件生成的 token,这样黑客就无法利用 URL 进行信息窃取和破坏。比如,如果用户 Piers 忘记密码的链接不是用 token 生成的,那么黑客就可以访问特定的 URL 对 Piers 的信息篡改(形如 http://youWebsite.com/password/Piers);而生成的 token 可以防止这一点,URL 完全是随机的(形如 http://youWebiste/password/1042637985,http://youWebiste/password/3798510426),黑客除非黑进用户的邮箱,否则很难得知用户忘记密码的链接。此外,token 还是有时间限制的,过了时间的 token,从服务器中删除。
这里 token 的算法比较简单,token = 系统时间字符串 + (用户 email 的每个字符 ASCII 值 * 10) % 100。本系统流量较小,出现 token 重复的概率很低。token 保存在 ConcurrentHashMap 中,防止由于多线程带来的异常。
其实更先进的 token 应该是用反对成加密的形式生成。
package per.piers.onlineJudge.util; import java.util.Random; import java.util.concurrent.ConcurrentHashMap;public class TokenUtil { private static final long TIMEOUT = 1000 * 60 * 5; private static ConcurrentHashMap<String, String> tokenEmails = new ConcurrentHashMap<>(); public static synchronized String addURLToken(long time, String email) { char[] emailCharacters = email.toCharArray(); Random random = new Random(); int emailSum = 0; for (char c : emailCharacters) { emailSum += ((int) c) * random.nextInt(10); } String key = String.format("%d%03d", time, emailSum % 100); tokenEmails.put(key, email); return key; } public static synchronized String getEmailFromToken(String token) { long now = System.currentTimeMillis(); for (String checkToken : tokenEmails.keySet()) { long create = Long.parseLong(checkToken.substring(0, token.length() - 3)); if (now < create) throw new IllegalStateException("now < create"); if (now - create > TIMEOUT) { tokenEmails.remove(checkToken); } } if (!tokenEmails.containsKey(token)) return null; long create = Long.parseLong(token.substring(0, token.length() - 3)); if (now < create) throw new IllegalStateException("now < create"); if (now - create < TIMEOUT) return tokenEmails.get(token); else return null; } }
邮件发送:
邮件发送采用 javax.mail 包。首先设置邮件的域名、用户名、密码,再设置邮件的内容,包括主题、发件人等,最后发送邮件。
package per.piers.onlineJudge.util; import javax.mail.*; import javax.mail.internet.InternetAddress; import javax.mail.internet.MimeMessage; import java.io.IOException; import java.io.InputStream; import java.security.Security; import java.util.Date; import java.util.Properties; public class MailUtil { private MailUtil() { } public static void sendEmail(String email, String subject, String content) throws MessagingException { Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider()); final Properties properties = new Properties(); try (InputStream inputStream = MailUtil.class.getClassLoader().getResourceAsStream("config/mail/mail.properties");) { properties.load(inputStream); } catch (IOException e) { e.printStackTrace(); } String username = properties.getProperty("mail.username"); String password = properties.getProperty("mail.password"); String domain = properties.getProperty("mail.domain"); Session session = Session.getDefaultInstance(properties, new Authenticator() { protected PasswordAuthentication getPasswordAuthentication() { return new PasswordAuthentication(properties.getProperty("mail.username"), password); } }); Message msg = new MimeMessage(session); msg.setFrom(new InternetAddress(username + "@" + domain)); msg.setRecipients(Message.RecipientType.TO, InternetAddress.parse(email, false)); msg.setSubject(subject); msg.setText(content); msg.setSentDate(new Date()); Transport.send(msg); } }
读取 Excel 文件:
主要是利用 POI 读取 Excel 文件,支持 xls、xlsx 格式。
其操作的顺序基本和 Excel 的结构一致,首先读取 Workbook,其实读取 Sheet,再次读取 Column,最后读取 Row。Row 的内容类型可以有很多类型,比如作为 String 读出。
package per.piers.onlineJudge.util; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.HashSet; public class ExcelUtil { private boolean isValidExcelFile(File file) { return file.getName().endsWith("xls") || file.getName().endsWith("xlsx"); } private Workbook getWorkbook(File file) throws IOException { Workbook wb = null; if (file.getName().endsWith("xls")) { //Excel 2003 wb = new HSSFWorkbook(new FileInputStream(file)); } else if (file.getName().endsWith("xlsx")) { // Excel 2007/2010 wb = new XSSFWorkbook(new FileInputStream(file)); } return wb; } public HashSet<String> readColumns(File excelFile, String columnName) throws IOException { if (!isValidExcelFile(excelFile)) throw new IllegalArgumentException("not a excel file"); Workbook workbook = getWorkbook(excelFile); Sheet sheet = workbook.getSheetAt(0); Row row0 = sheet.getRow(0); if(row0 == null) return null; int index = -1; for (int i = 0; i < row0.getPhysicalNumberOfCells(); i++) { if (row0.getCell(i).getStringCellValue().equals(columnName)) { index = i; break; } } if (index == -1) return null; HashSet<String> columns = new HashSet<>(sheet.getPhysicalNumberOfRows()); for (int i = 1; i < sheet.getPhysicalNumberOfRows(); i++) { columns.add(sheet.getRow(i).getCell(index).getStringCellValue()); } return columns; } }
抄袭作弊检测:
主要是利用了 K-means,K-means 具体原理网上有很多,这里就不多讲了。
具体实现选用的是 WEKA。WEKA 需要修改数据源,在 weka.jar/weka/experiment/DatabaseUtils.props 配置 MySQL 数据库连接:
# Database settings for MySQL 3.23.x, 4.x # # General information on database access can be found here: # http://weka.wikispaces.com/Databases # # url: http://www.mysql.com/ # jdbc: http://www.mysql.com/products/connector/j/ # author: Fracpete (fracpete at waikato dot ac dot nz) # version: $Revision: 11885 $ # JDBC driver (comma-separated list) jdbcDriver=com.mysql.cj.jdbc.Driver # database URL jdbcURL=jdbc:mysql://localhost:3306/online_judge?serverTimezone=UTC&useUnicode=true&characterEncoding=UTF-8&useSSL=true # specific data types string, getString() = 0; --> nominal boolean, getBoolean() = 1; --> nominal double, getDouble() = 2; --> numeric byte, getByte() = 3; --> numeric short, getByte()= 4; --> numeric int, getInteger() = 5; --> numeric long, getLong() = 6; --> numeric float, getFloat() = 7; --> numeric date, getDate() = 8; --> date text, getString() = 9; --> string time, getTime() = 10; --> date timestamp, getTime() = 11; --> date # other options CREATE_DOUBLE=DOUBLE CREATE_STRING=TEXT CREATE_INT=INT CREATE_DATE=DATETIME DateFormat=yyyy-MM-dd HH:mm:ss checkUpperCaseNames=false checkLowerCaseNames=false checkForTable=true # All the reserved keywords for this database # Based on the keywords listed at the following URL (2009-04-13): # http://dev.mysql.com/doc/mysqld-version-reference/en/mysqld-version-reference-reservedwords-5-0.html Keywords=\ ADD,\ ALL,\ ALTER,\ ANALYZE,\ AND,\ AS,\ ASC,\ ASENSITIVE,\ BEFORE,\ BETWEEN,\ BIGINT,\ BINARY,\ BLOB,\ BOTH,\ BY,\ CALL,\ CASCADE,\ CASE,\ CHANGE,\ CHAR,\ CHARACTER,\ CHECK,\ COLLATE,\ COLUMN,\ COLUMNS,\ CONDITION,\ CONNECTION,\ CONSTRAINT,\ CONTINUE,\ CONVERT,\ CREATE,\ CROSS,\ CURRENT_DATE,\ CURRENT_TIME,\ CURRENT_TIMESTAMP,\ CURRENT_USER,\ CURSOR,\ DATABASE,\ DATABASES,\ DAY_HOUR,\ DAY_MICROSECOND,\ DAY_MINUTE,\ DAY_SECOND,\ DEC,\ DECIMAL,\ DECLARE,\ DEFAULT,\ DELAYED,\ DELETE,\ DESC,\ DESCRIBE,\ DETERMINISTIC,\ DISTINCT,\ DISTINCTROW,\ DIV,\ DOUBLE,\ DROP,\ DUAL,\ EACH,\ ELSE,\ ELSEIF,\ ENCLOSED,\ ESCAPED,\ EXISTS,\ EXIT,\ EXPLAIN,\ FALSE,\ FETCH,\ FIELDS,\ FLOAT,\ FLOAT4,\ FLOAT8,\ FOR,\ FORCE,\ FOREIGN,\ FROM,\ FULLTEXT,\ GOTO,\ GRANT,\ GROUP,\ HAVING,\ HIGH_PRIORITY,\ HOUR_MICROSECOND,\ HOUR_MINUTE,\ HOUR_SECOND,\ IF,\ IGNORE,\ IN,\ INDEX,\ INFILE,\ INNER,\ INOUT,\ INSENSITIVE,\ INSERT,\ INT,\ INT1,\ INT2,\ INT3,\ INT4,\ INT8,\ INTEGER,\ INTERVAL,\ INTO,\ IS,\ ITERATE,\ JOIN,\ KEY,\ KEYS,\ KILL,\ LABEL,\ LEADING,\ LEAVE,\ LEFT,\ LIKE,\ LIMIT,\ LINES,\ LOAD,\ LOCALTIME,\ LOCALTIMESTAMP,\ LOCK,\ LONG,\ LONGBLOB,\ LONGTEXT,\ LOOP,\ LOW_PRIORITY,\ MATCH,\ MEDIUMBLOB,\ MEDIUMINT,\ MEDIUMTEXT,\ MIDDLEINT,\ MINUTE_MICROSECOND,\ MINUTE_SECOND,\ MOD,\ MODIFIES,\ NATURAL,\ NOT,\ NO_WRITE_TO_BINLOG,\ NULL,\ NUMERIC,\ ON,\ OPTIMIZE,\ OPTION,\ OPTIONALLY,\ OR,\ ORDER,\ OUT,\ OUTER,\ OUTFILE,\ PRECISION,\ PRIMARY,\ PRIVILEGES,\ PROCEDURE,\ PURGE,\ READ,\ READS,\ REAL,\ REFERENCES,\ REGEXP,\ RELEASE,\ RENAME,\ REPEAT,\ REPLACE,\ REQUIRE,\ RESTRICT,\ RETURN,\ REVOKE,\ RIGHT,\ RLIKE,\ SCHEMA,\ SCHEMAS,\ SECOND_MICROSECOND,\ SELECT,\ SENSITIVE,\ SEPARATOR,\ SET,\ SHOW,\ SMALLINT,\ SONAME,\ SPATIAL,\ SPECIFIC,\ SQL,\ SQLEXCEPTION,\ SQLSTATE,\ SQLWARNING,\ SQL_BIG_RESULT,\ SQL_CALC_FOUND_ROWS,\ SQL_SMALL_RESULT,\ SSL,\ STARTING,\ STRAIGHT_JOIN,\ TABLE,\ TABLES,\ TERMINATED,\ THEN,\ TINYBLOB,\ TINYINT,\ TINYTEXT,\ TO,\ TRAILING,\ TRIGGER,\ TRUE,\ UNDO,\ UNION,\ UNIQUE,\ UNLOCK,\ UNSIGNED,\ UPDATE,\ UPGRADE,\ USAGE,\ USE,\ USING,\ UTC_DATE,\ UTC_TIME,\ UTC_TIMESTAMP,\ VALUES,\ VARBINARY,\ VARCHAR,\ VARCHARACTER,\ VARYING,\ WHEN,\ WHERE,\ WHILE,\ WITH,\ WRITE,\ XOR,\ YEAR_MONTH,\ ZEROFILL # The character to append to attribute names to avoid exceptions due to # clashes between keywords and attribute names KeywordsMaskChar=_ #flags for loading and saving instances using DatabaseLoader/Saver nominalToStringLimit=50 idColumn=auto_generated_id VARCHAR = 0 TEXT = 0
之后根据K-means的流程,设置相关工作条件,执行算法。
package per.piers.onlineJudge.util; import per.piers.onlineJudge.model.TestInfo; import weka.clusterers.ClusterEvaluation; import weka.clusterers.SimpleKMeans; import weka.core.EuclideanDistance; import weka.core.Instances; import weka.experiment.InstanceQuery; import weka.filters.Filter; import weka.filters.unsupervised.attribute.StringToWordVector; import java.io.IOException; import java.io.InputStream; import java.util.Properties; public class FindPlagiarismAlgorithm { public String cluster(int qid, TestInfo[] testInfos) throws Exception { InstanceQuery query = new InstanceQuery(); final Properties properties = new Properties(); try (InputStream inputStream = MailUtil.class.getClassLoader().getResourceAsStream("config/mybatis/applications.properties");) { properties.load(inputStream); } catch (IOException e) { e.printStackTrace(); } query.setUsername(properties.getProperty("jdbc.username")); query.setPassword(properties.getProperty("jdbc.password")); query.setQuery("SELECT code FROM tests WHERE qid = " + qid + ";"); Instances data = query.retrieveInstances(); StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(data); filter.setWordsToKeep(1000); filter.setIDFTransform(true); filter.setOutputWordCounts(true); Instances dataFiltered = Filter.useFilter(data, filter); SimpleKMeans skm = new SimpleKMeans(); skm.setDisplayStdDevs(false); skm.setDistanceFunction(new EuclideanDistance()); skm.setMaxIterations(500); skm.setDontReplaceMissingValues(true); skm.setNumClusters(3); skm.setPreserveInstancesOrder(false); skm.setSeed(100); skm.buildClusterer(dataFiltered); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm); eval.evaluateClusterer(dataFiltered); StringBuilder builder = new StringBuilder(); for (int i = 0; i < dataFiltered.numInstances(); i++) { builder.append("用户ID:" + testInfos[i].getUid() + ",提交时间:" + testInfos[i].getSubmitTime() + ",在聚类编号 " + skm.clusterInstance(dataFiltered.instance(i)) + " 中。\n"); } return builder.toString(); } }