1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn .tree import DecisionTreeRegressor
4
+ from sklearn .ensemble import RandomForestRegressor
5
+ import xgboost as xgb
6
+ import lightgbm as lgb
7
+ from sklearn .metrics import r2_score
8
+ from sklearn .model_selection import train_test_split
9
+ import matplotlib .pyplot as plt
10
+
11
+ # 加载数据
12
+ file_path = 'Minlost.csv' # 请确保文件路径正确
13
+ df = pd .read_csv (file_path )
14
+
15
+ # 初始化结果存储
16
+ r2_scores = {"DecisionTree" : [], "RandomForest" : [], "XGBoost" : [], "LightGBM" : []}
17
+
18
+ # 遍历每个站点进行训练和预测
19
+ grouped = df .groupby ("PortID" )
20
+ for port_id , group in grouped :
21
+ print (f"Training for PortID { port_id } ..." )
22
+
23
+ # 准备数据
24
+ X = group [['Available bikes' ]].values
25
+ y = group ['機会損失' ].values
26
+
27
+ # 数据分割
28
+ X_train , X_test , y_train , y_test = train_test_split (X , y , test_size = 0.2 , random_state = 42 )
29
+
30
+ # 决策树模型
31
+ dt_model = DecisionTreeRegressor (random_state = 42 )
32
+ dt_model .fit (X_train , y_train )
33
+ dt_y_pred = dt_model .predict (X_test )
34
+ dt_r2 = r2_score (y_test , dt_y_pred )
35
+ r2_scores ["DecisionTree" ].append ((port_id , dt_r2 ))
36
+
37
+ # 随机森林模型
38
+ rf_model = RandomForestRegressor (n_estimators = 100 , random_state = 42 )
39
+ rf_model .fit (X_train , y_train )
40
+ rf_y_pred = rf_model .predict (X_test )
41
+ rf_r2 = r2_score (y_test , rf_y_pred )
42
+ r2_scores ["RandomForest" ].append ((port_id , rf_r2 ))
43
+
44
+ # XGBoost 模型
45
+ xgb_model = xgb .XGBRegressor (objective = 'reg:squarederror' , n_estimators = 100 , random_state = 42 )
46
+ xgb_model .fit (X_train , y_train )
47
+ xgb_y_pred = xgb_model .predict (X_test )
48
+ xgb_r2 = r2_score (y_test , xgb_y_pred )
49
+ r2_scores ["XGBoost" ].append ((port_id , xgb_r2 ))
50
+
51
+ # LightGBM 模型
52
+ lgb_model = lgb .LGBMRegressor (n_estimators = 100 , random_state = 42 )
53
+ lgb_model .fit (X_train , y_train )
54
+ lgb_y_pred = lgb_model .predict (X_test )
55
+ lgb_r2 = r2_score (y_test , lgb_y_pred )
56
+ r2_scores ["LightGBM" ].append ((port_id , lgb_r2 ))
57
+
58
+ print (f"PortID { port_id } R² (DecisionTree): { dt_r2 :.2f} " )
59
+ print (f"PortID { port_id } R² (RandomForest): { rf_r2 :.2f} " )
60
+ print (f"PortID { port_id } R² (XGBoost): { xgb_r2 :.2f} " )
61
+ print (f"PortID { port_id } R² (LightGBM): { lgb_r2 :.2f} " )
62
+
63
+ # 转换 R² 结果为 DataFrame
64
+ dt_scores_df = pd .DataFrame (r2_scores ["DecisionTree" ], columns = ["PortID" , "R2" ])
65
+ rf_scores_df = pd .DataFrame (r2_scores ["RandomForest" ], columns = ["PortID" , "R2" ])
66
+ xgb_scores_df = pd .DataFrame (r2_scores ["XGBoost" ], columns = ["PortID" , "R2" ])
67
+ lgb_scores_df = pd .DataFrame (r2_scores ["LightGBM" ], columns = ["PortID" , "R2" ])
68
+
69
+ # 可视化 R² 分数
70
+ plt .figure (figsize = (15 , 10 ))
71
+ models = ["DecisionTree" , "RandomForest" , "XGBoost" , "LightGBM" ]
72
+ colors = ["skyblue" , "orange" , "green" , "purple" ]
73
+ for i , (model , color ) in enumerate (zip (models , colors )):
74
+ scores_df = pd .DataFrame (r2_scores [model ], columns = ["PortID" , "R2" ])
75
+ plt .bar (
76
+ [x + i * 0.2 for x in range (len (scores_df ))], # 设置偏移量以避免柱状图重叠
77
+ scores_df ["R2" ],
78
+ width = 0.2 ,
79
+ label = model ,
80
+ color = color ,
81
+ align = "center"
82
+ )
83
+
84
+ plt .xlabel ("PortID" )
85
+ plt .ylabel ("R² Score" )
86
+ plt .title ("R² Scores for Each PortID by Model" )
87
+ plt .legend ()
88
+ plt .xticks (rotation = 90 )
89
+ plt .tight_layout ()
90
+ plt .show ()
0 commit comments