java 数据流相关

直接代码吧,很清楚:

package mahout;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;

import org.apache.hadoop.io.IOUtils;

public class GetLibData {
	//将数据集进行分割,单个文件太大了。
	public static void main(String[] args) throws Exception {
		File src = new File("D:\\hadoop相关\\数据集\\links-simple-sorted\\links-simple-sorted.txt");
		FileInputStream fis = new FileInputStream(src);
		BufferedReader reader = new BufferedReader(new InputStreamReader(fis));
		BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File("data/wike-part100000"))));
		String line = null;
		int count = 0;
		while((line = reader.readLine()) != null && count < 100001){
			writer.write(line);
			writer.write("\n");
			count++;
		}
		writer.close();
		fis.close();
		reader.close();
	}
	//下载数据集从网上
	private static void test1() throws MalformedURLException, IOException,
			FileNotFoundException {
		URL libUrl = new URL("http://www.occamslab.com/petricek/data/ratings.dat");
		
		InputStream in = libUrl.openStream();
		
		FileOutputStream fos = new FileOutputStream(new File("data/test.dat"));
		
		IOUtils.copyBytes(in, fos, 4096);;
		in.close();
		fos.close();
	}
}

 当然数据集可以自己制造。

posted @ 2014-08-05 15:01  jseven  阅读(101)  评论(0编辑  收藏  举报