113 lines
4.4 KiB
Python
113 lines
4.4 KiB
Python
|
|
import os
|
|||
|
|
|
|||
|
|
import numpy as np
|
|||
|
|
import pandas as pd
|
|||
|
|
import matplotlib.pyplot as plt
|
|||
|
|
from sklearn.ensemble import RandomForestRegressor
|
|||
|
|
|
|||
|
|
|
|||
|
|
def analyze_feature_importance(self):
|
|||
|
|
"""分析特征重要性"""
|
|||
|
|
try:
|
|||
|
|
self._log_step("Analyzing feature importance...")
|
|||
|
|
|
|||
|
|
if not (hasattr(self, 'data') and self.data is not None and len(self.data.columns) > 1):
|
|||
|
|
self._log_step("Not enough data for feature importance analysis", "warning")
|
|||
|
|
return None, "Not enough data for feature importance analysis", None
|
|||
|
|
|
|||
|
|
X = self.data
|
|||
|
|
y = self.data.iloc[:, 0] # 使用第一列作为目标变量
|
|||
|
|
|
|||
|
|
model = RandomForestRegressor(n_estimators=50, random_state=42) # 减少树的数量
|
|||
|
|
model.fit(X, y)
|
|||
|
|
|
|||
|
|
feature_importance = pd.Series(model.feature_importances_, index=X.columns)
|
|||
|
|
feature_importance = feature_importance.sort_values(ascending=False)
|
|||
|
|
|
|||
|
|
fi_df = feature_importance.reset_index()
|
|||
|
|
fi_df.columns = ['feature', 'importance']
|
|||
|
|
|
|||
|
|
summary = f"Feature importance analysis completed, top feature: {fi_df.iloc[0]['feature']}"
|
|||
|
|
|
|||
|
|
if not self.generate_plots:
|
|||
|
|
self._log_step("Feature importance analysis completed (data only)", "success")
|
|||
|
|
return None, summary, fi_df
|
|||
|
|
|
|||
|
|
plt.figure(figsize=(8, 6))
|
|||
|
|
feature_importance.head(10).plot(kind='bar')
|
|||
|
|
plt.title('Feature Importance Analysis')
|
|||
|
|
plt.ylabel('Importance Score')
|
|||
|
|
plt.tight_layout()
|
|||
|
|
|
|||
|
|
img_path = os.path.join(self.temp_dir.name, 'feature_importance.png')
|
|||
|
|
plt.savefig(img_path, dpi=150, bbox_inches='tight')
|
|||
|
|
plt.close()
|
|||
|
|
|
|||
|
|
self._log_step("Feature importance analysis completed", "success")
|
|||
|
|
return img_path, summary, fi_df
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
self._log_step(f"Feature importance analysis failed: {e}", "error")
|
|||
|
|
return None, f"Feature importance analysis failed: {e}", None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def perform_var_analysis(self):
|
|||
|
|
"""执行向量自回归分析"""
|
|||
|
|
try:
|
|||
|
|
self._log_step("Performing VAR analysis...")
|
|||
|
|
|
|||
|
|
if not (hasattr(self, 'data') and self.data is not None and len(self.data.columns) > 1):
|
|||
|
|
self._log_step("Not enough data for VAR analysis", "warning")
|
|||
|
|
return None, "数据不足,无法进行VAR分析", None
|
|||
|
|
|
|||
|
|
from statsmodels.tsa.api import VAR
|
|||
|
|
|
|||
|
|
numeric_data = self.data.select_dtypes(include=[np.number])
|
|||
|
|
if len(numeric_data.columns) < 2:
|
|||
|
|
self._log_step("Not enough numeric columns for VAR analysis", "warning")
|
|||
|
|
return None, "数值变量不足,无法进行VAR分析", None
|
|||
|
|
|
|||
|
|
var_data = numeric_data.iloc[:, : min(3, len(numeric_data.columns))]
|
|||
|
|
|
|||
|
|
model = VAR(var_data)
|
|||
|
|
results = model.fit(maxlags=2, ic='aic')
|
|||
|
|
|
|||
|
|
lag_order = results.k_ar
|
|||
|
|
forecast = results.forecast(var_data.values[-lag_order:], steps=10)
|
|||
|
|
|
|||
|
|
forecast_df = pd.DataFrame(data=forecast, columns=[f"{col}_forecast" for col in var_data.columns])
|
|||
|
|
summary = f"VAR分析完成,使用滞后阶数: {results.k_ar},生成了10期预测"
|
|||
|
|
|
|||
|
|
if not self.generate_plots:
|
|||
|
|
self._log_step("VAR analysis completed (data only)", "success")
|
|||
|
|
return None, summary, forecast_df
|
|||
|
|
|
|||
|
|
plt.figure(figsize=(12, 8))
|
|||
|
|
for i, col in enumerate(var_data.columns):
|
|||
|
|
plt.plot(range(len(var_data)), var_data[col].values, label=f'{col} (actual)', alpha=0.7)
|
|||
|
|
plt.plot(
|
|||
|
|
range(len(var_data), len(var_data) + 10),
|
|||
|
|
forecast[:, i],
|
|||
|
|
label=f'{col} (forecast)',
|
|||
|
|
linestyle='--',
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
plt.axvline(x=len(var_data), color='red', linestyle=':', alpha=0.7, label='Forecast Start')
|
|||
|
|
plt.xlabel('Time')
|
|||
|
|
plt.ylabel('Value')
|
|||
|
|
plt.title('Vector Autoregression (VAR) Forecast')
|
|||
|
|
plt.legend()
|
|||
|
|
plt.grid(True, alpha=0.3)
|
|||
|
|
plt.tight_layout()
|
|||
|
|
|
|||
|
|
img_path = os.path.join(self.temp_dir.name, 'var_analysis.png')
|
|||
|
|
plt.savefig(img_path, dpi=150, bbox_inches='tight')
|
|||
|
|
plt.close()
|
|||
|
|
|
|||
|
|
self._log_step("VAR analysis completed", "success")
|
|||
|
|
return img_path, summary, forecast_df
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
self._log_step(f"VAR analysis failed: {e}", "error")
|
|||
|
|
return None, f"VAR分析失败: {e}", None
|