python随机森林代码

from time import timefrom sklearn.ensemble import RandomForestRegressorfrom sklearn.model_selection import train_test_splitimport numpy as npfrom sklearn.metrics import mean_squared_errordata = np.loa

boy918.

2743人浏览 · 2021-10-19 23:59:08

boy918. · 2021-10-19 23:59:08 发布

from time import time
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import mean_squared_error

data = np.loadtxt('C:/Users/Administrator.DESKTOP-BT0S13O/Desktop/数据1.csv',delimiter=',')     
(括号内是路径文件）

RON = data[:,0]
factors = data[:,1:]
t0 = time()
forest= RandomForestRegressor(n_estimators=500,random_state=0,max_features=100,n_jobs=2)

X_train,X_test,y_train,y_test = train_test_split(factors,RON,test_size=0.2,shuffle=True,random_state=0)
#print("done in %0.3fs"(time()- t0))
forest.fit(X_train,y_train)
y_pred = forest.predict(X_train)

mse = mean_squared_error(y_train,y_pred)
print(mse)
print("Traing Score:%f" %forest.score(X_train, y_train))
print("Testing Score:%f" %forest.score(X_test, y_test))

importances = forest.feature_importances_ 
print('每个维度对应的重要性因子：\n',importances)
print(sorted(importances,reverse=True))
np.argsort(-importances)
print(sorted(importances*100,reverse=True))
indices = np.argsort(importances)[::-1]# a[::-1]让a逆序输出
print('得到按维度重要性因子排序的维度的序号：\n',indices)
most_import = indices[:100]#取最重要的100个
print(X_train,most_import)