import os import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestRegressor def analyze_feature_importance(self): """分析特征重要性""" try: self._log_step("Analyzing feature importance...") if not (hasattr(self, 'data') and self.data is not None and len(self.data.columns) > 1): self._log_step("Not enough data for feature importance analysis", "warning") return None, "Not enough data for feature importance analysis", None X = self.data y = self.data.iloc[:, 0] # 使用第一列作为目标变量 model = RandomForestRegressor(n_estimators=50, random_state=42) # 减少树的数量 model.fit(X, y) feature_importance = pd.Series(model.feature_importances_, index=X.columns) feature_importance = feature_importance.sort_values(ascending=False) fi_df = feature_importance.reset_index() fi_df.columns = ['feature', 'importance'] summary = f"Feature importance analysis completed, top feature: {fi_df.iloc[0]['feature']}" if not self.generate_plots: self._log_step("Feature importance analysis completed (data only)", "success") return None, summary, fi_df plt.figure(figsize=(8, 6)) feature_importance.head(10).plot(kind='bar') plt.title('Feature Importance Analysis') plt.ylabel('Importance Score') plt.tight_layout() img_path = os.path.join(self.temp_dir.name, 'feature_importance.png') plt.savefig(img_path, dpi=150, bbox_inches='tight') plt.close() self._log_step("Feature importance analysis completed", "success") return img_path, summary, fi_df except Exception as e: self._log_step(f"Feature importance analysis failed: {e}", "error") return None, f"Feature importance analysis failed: {e}", None def perform_var_analysis(self): """执行向量自回归分析""" try: self._log_step("Performing VAR analysis...") if not (hasattr(self, 'data') and self.data is not None and len(self.data.columns) > 1): self._log_step("Not enough data for VAR analysis", "warning") return None, "数据不足,无法进行VAR分析", None from statsmodels.tsa.api import VAR numeric_data = self.data.select_dtypes(include=[np.number]) if len(numeric_data.columns) < 2: self._log_step("Not enough numeric columns for VAR analysis", "warning") return None, "数值变量不足,无法进行VAR分析", None var_data = numeric_data.iloc[:, : min(3, len(numeric_data.columns))] model = VAR(var_data) results = model.fit(maxlags=2, ic='aic') lag_order = results.k_ar forecast = results.forecast(var_data.values[-lag_order:], steps=10) forecast_df = pd.DataFrame(data=forecast, columns=[f"{col}_forecast" for col in var_data.columns]) summary = f"VAR分析完成,使用滞后阶数: {results.k_ar},生成了10期预测" if not self.generate_plots: self._log_step("VAR analysis completed (data only)", "success") return None, summary, forecast_df plt.figure(figsize=(12, 8)) for i, col in enumerate(var_data.columns): plt.plot(range(len(var_data)), var_data[col].values, label=f'{col} (actual)', alpha=0.7) plt.plot( range(len(var_data), len(var_data) + 10), forecast[:, i], label=f'{col} (forecast)', linestyle='--', ) plt.axvline(x=len(var_data), color='red', linestyle=':', alpha=0.7, label='Forecast Start') plt.xlabel('Time') plt.ylabel('Value') plt.title('Vector Autoregression (VAR) Forecast') plt.legend() plt.grid(True, alpha=0.3) plt.tight_layout() img_path = os.path.join(self.temp_dir.name, 'var_analysis.png') plt.savefig(img_path, dpi=150, bbox_inches='tight') plt.close() self._log_step("VAR analysis completed", "success") return img_path, summary, forecast_df except Exception as e: self._log_step(f"VAR analysis failed: {e}", "error") return None, f"VAR分析失败: {e}", None