alink使用csv数据源
数据源-波士顿房价
过程:首先定义数据源batch
CsvSourceBatchOp
参数:
filePath:csv文件路径
列名: String[] colNames
列数据类型:TypeInformation<?>[] colTypes
列分隔符:String fieldDelim
行分隔符:String rowDelim
最后可以定义一个sinkOP来保存数据
最后通过BatchOperator.execute();
public static void main(String[] args) throws Exception{ String[] fileds = new String[]{"CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "convert"}; TypeInformation[] typeInformations = new TypeInformation[fileds.length]; for(int i = 0;i<fileds.length;i++){ typeInformations[i] = TypeInformation.of(Double.class); } CsvSourceBatchOp csvSourceBatchOp = new CsvSourceBatchOp("./house_price2.csv",fileds,typeInformations,",","\n"); LinearRegression lr = new LinearRegression(); lr.setFeatureCols("CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT").setLabelCol("convert").setPredictionCol("p"); BatchOperator out = lr.fit(csvSourceBatchOp).transform(csvSourceBatchOp); CsvSinkBatchOp csvSinkBatchOp = new CsvSinkBatchOp("./house_price3.csv"); csvSinkBatchOp.linkFrom(out); BatchOperator.execute(); }