192 lines
7.9 KiB
Python
192 lines
7.9 KiB
Python
"""v2 analysis route: analyze CSV from OSS/URL.
|
||
|
||
Design goals:
|
||
- Keep v1 endpoints unchanged
|
||
- Provide the same response shape as v1, but with URL as input
|
||
- Avoid leaking server local paths
|
||
"""
|
||
|
||
import gc
|
||
import logging
|
||
import os
|
||
import shutil
|
||
from datetime import datetime
|
||
from typing import Any, Dict, Optional
|
||
|
||
import psutil
|
||
from fastapi import APIRouter, BackgroundTasks, HTTPException, status
|
||
from pydantic import BaseModel
|
||
|
||
from app.core.config import settings
|
||
from app.services.analysis import TimeSeriesAnalysisSystem
|
||
from app.services.oss_csv_source import UrlValidationError, download_csv_to_tempfile
|
||
|
||
logger = logging.getLogger(__name__)
|
||
router = APIRouter()
|
||
|
||
|
||
class AnalysisV2Request(BaseModel):
|
||
"""v2 分析请求模型(输入为 OSS/URL)"""
|
||
|
||
oss_url: str
|
||
task_description: str = "时间序列数据分析"
|
||
data_background: Dict[str, Any] = {}
|
||
language: str = "zh"
|
||
generate_plots: bool = False
|
||
source_name: Optional[str] = None
|
||
|
||
|
||
@router.get("/available_methods", summary="获取可用的分析方法(v2)")
|
||
async def get_available_methods_v2() -> dict:
|
||
"""v2 版本:返回与 v1 相同的可用分析方法列表。"""
|
||
|
||
return {
|
||
"success": True,
|
||
"methods": {
|
||
"statistical_overview": {"name": "统计概览", "description": "生成数据的基本统计信息和分布图表"},
|
||
"time_series_analysis": {"name": "时间序列分析", "description": "分析变量随时间变化的趋势和模式"},
|
||
"acf_pacf_analysis": {"name": "自相关分析", "description": "生成自相关和偏自相关函数图"},
|
||
"stationarity_tests": {"name": "平稳性检验", "description": "执行ADF、KPSS等平稳性检验"},
|
||
"normality_tests": {"name": "正态性检验", "description": "执行Shapiro-Wilk、Jarque-Bera正态性检验"},
|
||
"seasonal_decomposition": {"name": "季节性分解", "description": "分解时间序列的趋势、季节和残差成分"},
|
||
"spectral_analysis": {"name": "频谱分析", "description": "分析时间序列的频域特征"},
|
||
"correlation_analysis": {"name": "相关性分析", "description": "计算变量间的相关性并生成热力图"},
|
||
"pca_scree_plot": {"name": "PCA碎石图", "description": "显示主成分分析的解释方差"},
|
||
"pca_analysis": {"name": "主成分分析", "description": "降维分析,识别数据的主要变化方向"},
|
||
"feature_importance": {"name": "特征重要性", "description": "分析各变量对目标预测的重要性"},
|
||
"clustering_analysis": {"name": "聚类分析", "description": "将数据点分组为具有相似特征的簇"},
|
||
"factor_analysis": {"name": "因子分析", "description": "识别潜在的因子结构"},
|
||
"cointegration_test": {"name": "协整检验", "description": "检验时间序列变量间的长期均衡关系"},
|
||
"var_analysis": {"name": "向量自回归", "description": "多变量时间序列建模和预测"},
|
||
},
|
||
}
|
||
|
||
|
||
def check_memory():
|
||
"""检查内存使用"""
|
||
|
||
process = psutil.Process(os.getpid())
|
||
memory_mb = process.memory_info().rss / 1024 / 1024
|
||
logger.info(f"当前内存使用: {memory_mb:.2f} MB")
|
||
|
||
if memory_mb > settings.MAX_MEMORY_MB:
|
||
logger.warning(f"内存使用超过阈值 ({settings.MAX_MEMORY_MB} MB),执行垃圾回收")
|
||
gc.collect()
|
||
|
||
|
||
@router.post("/analyze", summary="执行完整分析(v2:从 OSS URL 读取 CSV)")
|
||
async def analyze_data_v2(request: AnalysisV2Request, background_tasks: BackgroundTasks) -> dict:
|
||
"""Analyze CSV from an OSS/URL, returning the same structure as v1."""
|
||
|
||
downloaded = None
|
||
|
||
try:
|
||
logger.info("=" * 60)
|
||
logger.info("开始分析 (v2)")
|
||
logger.info(f"URL host: {request.oss_url}")
|
||
logger.info(f"任务: {request.task_description}")
|
||
logger.info(f"语言: {request.language}")
|
||
logger.info("=" * 60)
|
||
|
||
check_memory()
|
||
|
||
# 语言处理:支持 zh/en,其他值回退为 zh
|
||
lang_key = request.language if request.language in {"zh", "en"} else "zh"
|
||
|
||
# charts 模式下强制不生成图片,即使请求传了 generate_plots=true
|
||
generate_plots = False
|
||
if request.generate_plots:
|
||
logger.info("generate_plots requested true, forcing false to skip image generation")
|
||
|
||
# 下载到临时文件
|
||
try:
|
||
downloaded = download_csv_to_tempfile(request.oss_url, suffix=".csv")
|
||
except UrlValidationError as e:
|
||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
|
||
|
||
filename_for_meta = request.source_name or downloaded.source_name
|
||
|
||
# 创建分析器实例(复用原有分析系统)
|
||
analyzer = TimeSeriesAnalysisSystem(
|
||
downloaded.local_path,
|
||
request.task_description,
|
||
data_background=request.data_background,
|
||
language=lang_key,
|
||
generate_plots=generate_plots,
|
||
)
|
||
|
||
# 运行分析
|
||
logger.info("执行分析...")
|
||
results, log_entries = analyzer.run_analysis()
|
||
|
||
if results is None:
|
||
logger.error("分析失败")
|
||
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="分析失败")
|
||
|
||
# 准备返回数据(尽量与 v1 保持一致)
|
||
response_data = {
|
||
"success": True,
|
||
"meta": {
|
||
"filename": filename_for_meta,
|
||
"task_description": request.task_description,
|
||
"language": lang_key,
|
||
"generate_plots": generate_plots,
|
||
"created_at": datetime.now().isoformat(),
|
||
"version": "v2",
|
||
"source": {
|
||
"type": "oss_url",
|
||
"host": downloaded.source_host,
|
||
"name": filename_for_meta,
|
||
"etag": downloaded.etag,
|
||
"last_modified": downloaded.last_modified,
|
||
},
|
||
},
|
||
"analysis": {
|
||
lang_key: {
|
||
"pdf_filename": None,
|
||
"ppt_filename": None,
|
||
"data_description": results.get("data_description"),
|
||
"preprocessing_steps": results.get("preprocessing_steps", []),
|
||
"api_analysis": results.get("api_analysis", {}),
|
||
"steps": results.get("steps", []),
|
||
"charts": results.get("charts", {}),
|
||
}
|
||
},
|
||
"images": {},
|
||
"log": log_entries[-20:] if log_entries else [],
|
||
"original_image": None,
|
||
}
|
||
|
||
# 兼容旧前端:始终提供 analysis.zh
|
||
if lang_key != "zh":
|
||
response_data["analysis"]["zh"] = response_data["analysis"][lang_key]
|
||
|
||
analysis_bucket = response_data["analysis"][lang_key]
|
||
|
||
# 确保不暴露本地路径,steps chart 引用即可
|
||
steps = analysis_bucket.get("steps")
|
||
if isinstance(steps, list):
|
||
for step in steps:
|
||
if isinstance(step, dict) and "image_path" in step:
|
||
step.pop("image_path", None)
|
||
|
||
# images 保留为空兼容旧前端
|
||
response_data["images"] = {}
|
||
|
||
logger.info("分析完成 (v2)")
|
||
return response_data
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"分析异常 (v2): {str(e)}", exc_info=True)
|
||
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
||
|
||
finally:
|
||
# 清理临时文件
|
||
if downloaded is not None:
|
||
try:
|
||
os.unlink(downloaded.local_path)
|
||
except Exception:
|
||
pass
|