pyspark 生存回归


from pyspark.ml.regression import AFTSurvivalRegression
from pyspark.ml.linalg import Vectors

from pyspark.sql import SparkSession

spark= SparkSession\
                .builder \
                .appName("dataFrame") \
                .getOrCreate()

training = spark.createDataFrame([
    (1.218, 1.0, Vectors.dense(1.560, -0.605)),
    (2.949, 0.0, Vectors.dense(0.346, 2.158)),
    (3.627, 0.0, Vectors.dense(1.380, 0.231)),
    (0.273, 1.0, Vectors.dense(0.520, 1.151)),
    (4.199, 0.0, Vectors.dense(0.795, -0.226))], ["label", "censor", "features"])
quantileProbabilities = [0.3, 0.6]
aft = AFTSurvivalRegression(quantileProbabilities=quantileProbabilities,
                            quantilesCol="quantiles")

model = aft.fit(training)

# Print the coefficients, intercept and scale parameter for AFT survival regression
print("Coefficients: " + str(model.coefficients))
print("Intercept: " + str(model.intercept))
print("Scale: " + str(model.scale))
model.transform(training).show(truncate=False)
Coefficients: [-0.4963044110531165,0.19845217252922842]
Intercept: 2.638089896305634
Scale: 1.5472363533632303
+-----+------+--------------+-----------------+---------------------------------------+
|label|censor|features      |prediction       |quantiles                              |
+-----+------+--------------+-----------------+---------------------------------------+
|1.218|1.0   |[1.56,-0.605] |5.718985621018952|[1.1603229908059516,4.995460583406753] |
|2.949|0.0   |[0.346,2.158] |18.07678210850554|[3.6675919944963185,15.789837303662035]|
|3.627|0.0   |[1.38,0.231]  |7.381908879359964|[1.4977129086101577,6.448002719505493] |
|0.273|1.0   |[0.52,1.151]  |13.57771781488451|[2.754778414791513,11.859962351993202] |
|4.199|0.0   |[0.795,-0.226]|9.013087597344812|[1.828662187733188,7.8728164067854856] |
+-----+------+--------------+-----------------+---------------------------------------+
posted @ 2022-08-19 22:58  luoganttcc  阅读(2)  评论(0编辑  收藏  举报