Json-Python-Server/app/services/analysis/modules/modeling.py
2026-01-29 18:18:32 +08:00

113 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
def analyze_feature_importance(self):
"""分析特征重要性"""
try:
self._log_step("Analyzing feature importance...")
if not (hasattr(self, 'data') and self.data is not None and len(self.data.columns) > 1):
self._log_step("Not enough data for feature importance analysis", "warning")
return None, "Not enough data for feature importance analysis", None
X = self.data
y = self.data.iloc[:, 0] # 使用第一列作为目标变量
model = RandomForestRegressor(n_estimators=50, random_state=42) # 减少树的数量
model.fit(X, y)
feature_importance = pd.Series(model.feature_importances_, index=X.columns)
feature_importance = feature_importance.sort_values(ascending=False)
fi_df = feature_importance.reset_index()
fi_df.columns = ['feature', 'importance']
summary = f"Feature importance analysis completed, top feature: {fi_df.iloc[0]['feature']}"
if not self.generate_plots:
self._log_step("Feature importance analysis completed (data only)", "success")
return None, summary, fi_df
plt.figure(figsize=(8, 6))
feature_importance.head(10).plot(kind='bar')
plt.title('Feature Importance Analysis')
plt.ylabel('Importance Score')
plt.tight_layout()
img_path = os.path.join(self.temp_dir.name, 'feature_importance.png')
plt.savefig(img_path, dpi=150, bbox_inches='tight')
plt.close()
self._log_step("Feature importance analysis completed", "success")
return img_path, summary, fi_df
except Exception as e:
self._log_step(f"Feature importance analysis failed: {e}", "error")
return None, f"Feature importance analysis failed: {e}", None
def perform_var_analysis(self):
"""执行向量自回归分析"""
try:
self._log_step("Performing VAR analysis...")
if not (hasattr(self, 'data') and self.data is not None and len(self.data.columns) > 1):
self._log_step("Not enough data for VAR analysis", "warning")
return None, "数据不足无法进行VAR分析", None
from statsmodels.tsa.api import VAR
numeric_data = self.data.select_dtypes(include=[np.number])
if len(numeric_data.columns) < 2:
self._log_step("Not enough numeric columns for VAR analysis", "warning")
return None, "数值变量不足无法进行VAR分析", None
var_data = numeric_data.iloc[:, : min(3, len(numeric_data.columns))]
model = VAR(var_data)
results = model.fit(maxlags=2, ic='aic')
lag_order = results.k_ar
forecast = results.forecast(var_data.values[-lag_order:], steps=10)
forecast_df = pd.DataFrame(data=forecast, columns=[f"{col}_forecast" for col in var_data.columns])
summary = f"VAR分析完成使用滞后阶数: {results.k_ar}生成了10期预测"
if not self.generate_plots:
self._log_step("VAR analysis completed (data only)", "success")
return None, summary, forecast_df
plt.figure(figsize=(12, 8))
for i, col in enumerate(var_data.columns):
plt.plot(range(len(var_data)), var_data[col].values, label=f'{col} (actual)', alpha=0.7)
plt.plot(
range(len(var_data), len(var_data) + 10),
forecast[:, i],
label=f'{col} (forecast)',
linestyle='--',
)
plt.axvline(x=len(var_data), color='red', linestyle=':', alpha=0.7, label='Forecast Start')
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Vector Autoregression (VAR) Forecast')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
img_path = os.path.join(self.temp_dir.name, 'var_analysis.png')
plt.savefig(img_path, dpi=150, bbox_inches='tight')
plt.close()
self._log_step("VAR analysis completed", "success")
return img_path, summary, forecast_df
except Exception as e:
self._log_step(f"VAR analysis failed: {e}", "error")
return None, f"VAR分析失败: {e}", None