Java抓取Codeforces——针对某一次提交的源码和数据

需要引入Jsoup依赖:

		<dependency>
		  <!-- jsoup HTML parser library @ https://jsoup.org/ -->
		  <groupId>org.jsoup</groupId>
		  <artifactId>jsoup</artifactId>
		  <version>1.12.1</version>
		</dependency>

Jsoup相关依赖:https://jsoup.org/
其他需要的是 FileHelper 类,参见:https://www.cnblogs.com/zifeiy/p/9224569.html
(不过我们这边可能需要将"UTF-8"改成"GBK" ~)
然后实现的类如下:

package com.zifeiy.cf_data_get.handle;

import java.io.File;
import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.zifeiy.cf_data_get.assist.FileHelper;

public class SubmissionInfoGet {
	
	public void getSubmission(int contestId, int submissionId) throws IOException {
		String urlString = "http://codeforces.com/contest/" + contestId + "/submission/" + submissionId;
		Document doc = Jsoup.connect(urlString).get();
		System.out.println("title = " + doc.title());
		
		String dirString = "D:/codeforces/" + submissionId;
		File dir = new File(dirString);
		if (dir.exists() == false) dir.mkdirs();
		File dataDir = new File(dirString + File.separator + "data");
		if (dataDir.exists() == false) dataDir.mkdirs();
		
		// 代码
		String codeString = doc.getElementById("program-source-text").text();
		File codeFile = new File(dirString + File.separator + "std.cpp");
		FileHelper.writeFile(codeFile, codeString);
		Elements inputElements = doc.getElementsByClass("file input-view");
		Elements outputElements = doc.getElementsByClass("file answer-view");
		int sz = inputElements.size();
		int cnt = 0;
		for (int i = 0; i < sz; i ++) {
			String inputTextString = inputElements.get(i).getElementsByClass("text").get(0).text();
			String outputTextString = outputElements.get(i).getElementsByClass("text").get(0).text();
			if (inputTextString.endsWith("...") == false) {
//				System.out.println("[input]\n" + inputTextString + "\n[output]\n" + outputTextString + "\n[end]");
				cnt ++;
				File inputFile = new File(dirString + File.separator + "data" + File.separator + cnt + ".in");
				FileHelper.writeFile(inputFile, inputTextString);
				File outputFile = new File(dirString + File.separator + "data" + File.separator + cnt + ".out");
				FileHelper.writeFile(outputFile, outputTextString);
			}
		}
		
	}
	
	
	// main for test
	public static void main(String[] args) throws IOException {
		new SubmissionInfoGet().getSubmission(1169, 54847813);
	}
	
}

其中,contestIdsubmissionId 分别对应 比赛Id 和 提交Id 。

posted @ 2019-06-02 15:02  zifeiy  阅读(505)  评论(0编辑  收藏  举报