pyspark logistic

from import Vectors
from import LogisticRegression
from pyspark.sql import SparkSession
spark= SparkSession\
                .builder \
                .appName("dataFrame") \

# Prepare training data from a list of (label, features) tuples.
training = spark.createDataFrame([
    (1.0, Vectors.dense([0.0, 1.1, 0.1])),
    (0.0, Vectors.dense([2.0, 1.0, -1.0])),
    (0.0, Vectors.dense([2.0, 1.3, 1.0])),
    (1.0, Vectors.dense([0.0, 1.2, -0.5]))], ["label", "features"])

# Create a LogisticRegression instance. This instance is an Estimator.

# regParam正则化参数
lr = LogisticRegression(maxIter=10, regParam=0.01)
# Print out the parameters, documentation, and any default values.
#print("LogisticRegression parameters:\n" + lr.explainParams() + "\n")

# Learn a LogisticRegression model. This uses the parameters stored in lr.
model1 =

test = spark.createDataFrame([
    (1.0, Vectors.dense([-1.0, 1.5, 1.3])),
    (0.0, Vectors.dense([3.0, 2.0, -0.1])),
    (1.0, Vectors.dense([0.0, 2.2, -1.5]))], ["label", "features"])

prediction = model1.transform(test)
result ="features", "label", "probability", "prediction") \

for row in result:
    print("features=%s, label=%s -> prob=%s, prediction=%s"
          % (row.features, row.label, row.probability, row.prediction))
features=[-1.0,1.5,1.3], label=1.0 -> prob=[0.0013759947069214283,0.9986240052930786], prediction=1.0
features=[3.0,2.0,-0.1], label=0.0 -> prob=[0.9816604009374171,0.018339599062582975], prediction=0.0
features=[0.0,2.2,-1.5], label=1.0 -> prob=[0.0016981475578358419,0.9983018524421641], prediction=1.0
posted @ 2019-03-12 12:51  luoganttcc  阅读(141)  评论(0编辑  收藏  举报