from time import time
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import mean_squared_error

data = np.loadtxt('C:/Users/Administrator.DESKTOP-BT0S13O/Desktop/数据1.csv',delimiter=',')     
(括号内是路径文件)

RON = data[:,0]
factors = data[:,1:]
t0 = time()
forest= RandomForestRegressor(n_estimators=500,random_state=0,max_features=100,n_jobs=2)

X_train,X_test,y_train,y_test = train_test_split(factors,RON,test_size=0.2,shuffle=True,random_state=0)
#print("done in %0.3fs"(time()- t0))
forest.fit(X_train,y_train)
y_pred = forest.predict(X_train)

mse = mean_squared_error(y_train,y_pred)
print(mse)
print("Traing Score:%f" %forest.score(X_train, y_train))
print("Testing Score:%f" %forest.score(X_test, y_test))

importances = forest.feature_importances_ 
print('每个维度对应的重要性因子:\n',importances)
print(sorted(importances,reverse=True))
np.argsort(-importances)
print(sorted(importances*100,reverse=True))
indices = np.argsort(importances)[::-1]# a[::-1]让a逆序输出
print('得到按维度重要性因子排序的维度的序号:\n',indices)
most_import = indices[:100]#取最重要的100个
print(X_train,most_import)

Logo

华为开发者空间,是为全球开发者打造的专属开发空间,汇聚了华为优质开发资源及工具,致力于让每一位开发者拥有一台云主机,基于华为根生态开发、创新。

更多推荐