Java生成-zipf分布的数据集(自定义倾斜度,用作spark data skew测试)
1.代码
import java.io.Serializable; import java.util.NavigableMap; import java.util.Random; import java.util.TreeMap; public class Zifp_gen implements Serializable { private Random random = new Random(0); NavigableMap<Double, Integer> map; private static final double Constant = 1.0; public Zifp_gen(int nums, double skewness) { // create the TreeMap map = computeMap(nums, skewness); } //size为rank个数,skew为数据倾斜程度, 取值为0表示数据无倾斜,取值越大倾斜程度越高 private static NavigableMap<Double, Integer> computeMap( int size, double skew) { NavigableMap<Double, Integer> map = new TreeMap<Double, Integer>(); //总频率 double div = 0; //对每个rank,计算对应的词频,计算总词频 for (int i = 1; i <= size; i++) { //the frequency in position i div += (Constant / Math.pow(i, skew)); } //计算每个rank对应的y值,所以靠前rank的y值区间远比后面rank的y值区间大 double sum = 0; for (int i = 1; i <= size; i++) { double p = (Constant / Math.pow(i, skew)) / div; sum += p; map.put(sum, i - 1); } return map; } // public int next() { // [1,n] // double value = random.nextDouble(); // //找最近y值对应的rank // return map.ceilingEntry(value).getValue() + 1; // } }
2.test
import java.util.NavigableMap;
public class Test { public static void main(String args[]){ Zifp_gen z1=new Zifp_gen(100,1.0); for (NavigableMap.Entry<Double, Integer> entry : z1.map.entrySet()) { System.out.println("Key = " + entry.getKey() + ", Value = " + entry.getValue()); } } }
原Zipf齐夫分布及Java实现
zipf数据写入外部文本
public class Test { public static void main(String args[]) throws IOException{ Zifp_gen z1=new Zifp_gen(100,0.5); PrintWriter pw=new PrintWriter(new FileWriter("F:\\zipf_100_0.5.txt")); for (NavigableMap.Entry<Double, Integer> entry : z1.map.entrySet()) { // System.out.println("Key = " + entry.getKey() + ", Value = " + entry.getValue()); // String str="Key = " + entry.getKey() + ", Value = " + entry.getValue(); String str= entry.getKey() + " " ; pw.println(str); } pw.close(); } }
posted on 2018-11-19 11:40 moonlight.ml 阅读(825) 评论(0) 编辑 收藏 举报