import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
train_data = pd.read_csv('/kaggle/input/titanic/train.csv')
train_data.head() # 训练集
test_data = pd.read_csv('/kaggle/input/titanic/test.csv')
test_data.head() # 测试集
features = ['Pclass', 'Sex', 'SibSp', 'Parch']
X = pd.get_dummies(train_data[features]) # X:训练数据
y = train_data['Survived'] # y:类别
model = RandomForestClassifier(n_estimators=100,
max_depth=5, random_state=1)
model.fit(X,y) # 模型
X_test = pd.get_dummies(test_data[features]) # 测试
predictions = model.predict(X_test)
output = pd.DataFrame({'PassengerId': test_data.PassengerId,
'Survived': predictions})
output.to_csv('my_submission.csv', index=False) # 输出
print('Your submission was successfully saved!')