概述

Python 生态系统为时间序列分析提供了丰富的工具库,从传统统计方法到现代深度学习模型都有成熟的实现。本章将详细介绍主流工具库的功能特点、API 使用方法和选择建议。


一、主流工具库概览

1.1 功能对比矩阵

库名GitHub Stars核心特点适用场景学习曲线
Nixtla/StatsForecast5k+高性能统计模型生产级预测中等
Darts8k+统一接口快速原型
SKtime7.8ksklearn兼容分类/回归中等
Prophet16k+业务友好业务预测
NeuralForecast1.5k+深度学习大规模预测中等

1.2 选择决策树

graph TD
    A[开始] --> B{数据规模?}
    B -->|小于1万| C{需要深度学习?}
    C -->|否| D[Prophet / Darts]
    C -->|是| E[Darts / NeuralForecast]
    B -->|1万-100万| F{需要统计模型?}
    F -->|是| G[StatsForecast]
    F -->|否| H[NeuralForecast]
    B -->|大于100万| I[自定义 / GPU加速]

二、Nixtla 生态

2.1 StatsForecast

StatsForecast 提供高性能的时间序列统计预测模型:

# 安装
# pip install statsforecast
 
import numpy as np
import pandas as pd
from statsforecast import StatsForecast
from statsforecast.models import (
    AutoARIMA, AutoETS, AutoTheta,
    DynamicOptimizedTheta, SeasonalExponentialSmoothing,
    MSTL, CrostonClassic, HistoricAverage
)
 
# 准备数据
# 格式: unique_id, ds, y
df = pd.DataFrame({
    'unique_id': ['series_1'] * 100,
    'ds': pd.date_range('2023-01-01', periods=100, freq='D'),
    'y': np.sin(np.arange(100) * 0.1) + np.random.randn(100) * 0.1
})
 
# 创建模型
models = [
    AutoARIMA(season_length=7),
    AutoETS(season_length=7),
    AutoTheta(season_length=7),
    DynamicOptimizedTheta(season_length=7),
    MSTL(season_length=[7, 30]),  # 多季节
    SeasonalExponentialSmoothing(season_length=7, alpha=0.1),
]
 
# 创建 StatsForecast 实例
sf = StatsForecast(
    models=models,
    freq='D',
    n_jobs=-1  # 并行计算
)
 
# 训练和预测
forecasts = sf.forecast(df, h=14)  # 预测未来14天
print(forecasts.head())

2.2 NeuralForecast

NeuralForecast 提供深度学习预测模型:

# pip install neuralforecast
 
import torch
import numpy as np
import pandas as pd
from neuralforecast import NeuralForecast
from neuralforecast.models import (
    NBEATS, NHITS, TFT, VanillaTransformer,
    StemGNN, Informer, Autoformer
)
from neuralforecast.losses.pytorch import MSE, Huber
 
# 准备数据
# 格式: unique_id, ds, y, 以及可选的外生变量
df = pd.DataFrame({
    'unique_id': ['series_1'] * 200,
    'ds': pd.date_range('2023-01-01', periods=200, freq='H'),
    'y': np.sin(np.arange(200) * 0.1) + 0.5 * np.random.randn(200),
    # 可选外生变量
    'exog_1': np.random.randn(200),
})
 
# 创建模型
models = [
    NBEATS(input_size=48, h=24, max_epochs=50),
    NHITS(input_size=48, h=24, max_epochs=50),
    TFT(input_size=48, h=24, max_epochs=50, 
        n_heads=4, d_ffn=128),
]
 
# 创建 NeuralForecast
nf = NeuralForecast(
    models=models,
    freq='H'
)
 
# 训练和预测
nf.fit(df)
forecasts = nf.predict()
print(forecasts.head())

2.3 自动模型选择

from statsforecast.models import AutoARIMA, AutoETS, AutoTheta
from statsforecast.utils import generate_series
 
# 生成示例数据
series = generate_series(n_series=10, freq='D')
 
# 自动选择最佳模型
from statsforecast.models import AutoEnsemble
 
ensemble = AutoEnsemble([
    AutoARIMA(season_length=7),
    AutoETS(season_length=7),
    AutoTheta(season_length=7),
])
 
# 使用交叉验证评估
from statsforecast.utils import cross_validation
 
cv_df = cross_validation(
    df=df,
    models=[AutoARIMA(), AutoETS()],
    h=7,  # 预测步长
    step_size=7,  # 滚动窗口步长
    n_windows=3  # 窗口数量
)
 
print(cv_df.groupby('model')['AutoARIMA'].mean())

三、Darts

3.1 统一 API

Darts 提供了统一的接口来使用多种时间序列模型:

# pip install darts
 
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import (
    # 统计模型
    ARIMA, ExponentialSmoothing, Prophet,
    # 深度学习模型
    RNNModel, TransformerModel, NBEATSModel,
    # 其他
    Theta, FFT
)
 
# 创建 TimeSeries 对象
# 方式1:从 DataFrame
df = pd.DataFrame({
    'date': pd.date_range('2023-01-01', periods=365, freq='D'),
    'value': np.sin(np.arange(365) * 2 * np.pi / 52) + np.random.randn(365) * 0.1
})
ts = TimeSeries.from_dataframe(df, time_col='date', value_cols='value')
 
# 方式2:从数组
ts = TimeSeries.from_times_and_values(
    times=pd.date_range('2023-01-01', periods=365, freq='D'),
    values=np.sin(np.arange(365) * 0.1)
)
 
# 训练/预测分割
train, val = ts.split_after(pd.Timestamp('2023-09-01'))

3.2 统计模型

from darts.models import ARIMA, ExponentialSmoothing, Theta
 
# ARIMA
model_arima = ARIMA(p=5, d=1, q=2, seasonal_order=(1, 1, 1, 7))
model_arima.fit(train)
prediction_arima = model_arima.predict(n=30)
 
# 指数平滑
model_ets = ExponentialSmoothing(seasonal_periods=7)
model_ets.fit(train)
prediction_ets = model_ets.predict(n=30)
 
# Theta
model_theta = Theta(theta=2)
model_theta.fit(train)
prediction_theta = model_theta.predict(n=30)

3.3 深度学习模型

from darts.models import NBEATSModel, TransformerModel, RNNModel
from darts.dataprocessing.transformers import Scaler
 
# 数据归一化
scaler = Scaler()
train_scaled = scaler.fit_transform(train)
 
# N-BEATS
model_nbeats = NBEATSModel(
    input_chunk_length=30,
    output_chunk_length=7,
    n_epochs=50,
    random_state=42
)
model_nbeats.fit(train_scaled)
 
# 预测
pred_nbeats = model_nbeats.predict(n=14)
pred_nbeats_unscaled = scaler.inverse_transform(pred_nbeats)
 
# Transformer
model_transformer = TransformerModel(
    input_chunk_length=30,
    output_chunk_length=7,
    d_model=128,
    nhead=4,
    num_encoder_layers=2,
    num_decoder_layers=2,
    dim_feedforward=256,
    n_epochs=50
)
model_transformer.fit(train_scaled)
 
# RNN/LSTM
model_rnn = RNNModel(
    input_chunk_length=30,
    output_chunk_length=7,
    model='LSTM',
    hidden_dim=32,
    n_rnn_layers=2,
    n_epochs=50,
    random_state=42
)
model_rnn.fit(train_scaled)

3.4 多序列处理

# 创建多个序列
from darts import TimeSeries
 
# 方式1:从 DataFrame(宽格式)
df_multi = pd.DataFrame({
    'date': pd.date_range('2023-01-01', periods=100, freq='D'),
    'series_1': np.sin(np.arange(100) * 0.1),
    'series_2': np.cos(np.arange(100) * 0.1),
    'series_3': np.random.randn(100)
})
ts_multi = TimeSeries.from_dataframe(df_multi, time_col='date')
 
# 方式2:从 DataFrame(长格式)
df_long = pd.DataFrame({
    'series_id': ['A'] * 100 + ['B'] * 100,
    'date': list(pd.date_range('2023-01-01', periods=100, freq='D')) * 2,
    'value': list(np.sin(np.arange(100) * 0.1)) + list(np.cos(np.arange(100) * 0.1))
})
ts_list = TimeSeries.from_group_dataframe(
    df_long, 
    group_cols='series_id',
    time_col='date',
    value_cols='value'
)
 
# 训练全局模型
model_global = NBEATSModel(
    input_chunk_length=30,
    output_chunk_length=7,
    n_epochs=50
)
model_global.fit(ts_list)
 
# 批量预测
predictions = model_global.predict(n=14, series=ts_list)

四、SKtime

4.1 sklearn 兼容 API

SKtime 提供了与 sklearn 兼容的时间序列接口:

# pip install sktime
 
import numpy as np
import pandas as pd
from sktime.datasets import load_unit_test
from sktime.forecasting.all import (
    # 预测器
    ARIMA, ExponentialSmoothing, Theta,
    AutoARIMA, AutoETS,
    # 转换器
    BoxCoxTransformer, DetrendTransformer,
    pdq_auto_grid, temporal_importance
)
from sktime.forecasting.model_selection import (
    temporal_train_test_split,
    ExpandingWindowSplitter,
    ForecastingGridSearchCV
)
from sktime.performance_metrics.forecasting import (
    mean_absolute_error,
    mean_squared_error,
    mean_absolute_percentage_error
)
 
# 加载数据
y, X = load_unit_test()
print(f"目标变量形状: {y.shape}")
print(f"特征形状: {X.shape}")
 
# 训练/测试分割
y_train, y_test = temporal_train_test_split(y, test_size=26)
 
# 基础使用
forecaster = ARIMA(order=(1, 1, 0))
forecaster.fit(y_train)
y_pred = forecaster.predict(fh=[1, 2, 3, 4])  # 预测未来4步

4.2 自动参数调优

from sktime.forecasting.all import AutoARIMA
from sktime.forecasting.model_selection import (
    ForecastingGridSearchCV,
    ExpandingWindowSplitter
)
 
# 定义交叉验证策略
cv = ExpandingWindowSplitter(
    initial_window=100,
    step_length=20,
    fh=np.arange(1, 13)  # 预测12步
)
 
# 网格搜索
param_grid = {
    'p': [1, 2, 3],
    'd': [0, 1],
    'q': [0, 1, 2]
}
 
forecaster = AutoARIMA(sp=52)  # 年度季节性
 
grid_search = ForecastingGridSearchCV(
    forecaster=forecaster,
    param_grid=param_grid,
    cv=cv,
    scoring=mean_absolute_percentage_error,
    n_jobs=-1
)
 
grid_search.fit(y_train)
print(f"最佳参数: {grid_search.best_params_}")
print(f"最佳分数: {grid_search.best_score_}")
 
# 使用最佳模型预测
y_pred = grid_search.predict(fh=[1, 2, 3])

4.3 管道构建

from sktime.pipeline import ForecastingPipeline
from sktime.transformations.series.detrend import DetrendTransformer
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.forecasting.all import ARIMA
 
# 构建管道
pipeline = ForecastingPipeline(steps=[
    ('boxcox', BoxCoxTransformer()),
    ('detrend', DetrendTransformer(method='linear')),
    ('forecast', ARIMA())
])
 
# 训练和预测
pipeline.fit(y_train)
y_pred = pipeline.predict(fh=[1, 2, 3])
 
# 反向转换获取原始尺度的预测
y_pred_original = pipeline.inverse_transform(y_pred)

4.4 时间序列分类

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.classification.shapelet_based import ShapeletTransformClassifier
from sktime.datasets import load_arrow_head
 
# 加载分类数据
X_train, y_train = load_arrow_head(split='train')
X_test, y_test = load_arrow_head(split='test')
 
# 1-NN 分类器
clf = KNeighborsTimeSeriesClassifier(distance='dtw')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
 
# 评估
from sklearn.metrics import accuracy_score
print(f"准确率: {accuracy_score(y_test, y_pred):.2%}")

五、Prophet

5.1 基础使用

Prophet 是 Facebook 开发的业务友好型预测库:

# pip install prophet
 
import pandas as pd
import numpy as np
from prophet import Prophet
 
# 准备数据(Prophet 需要特定格式)
df = pd.DataFrame({
    'ds': pd.date_range('2023-01-01', periods=365, freq='D'),
    'y': np.sin(np.arange(365) * 2 * np.pi / 52) + np.random.randn(365) * 0.1
})
 
# 创建模型
model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    seasonality_mode='multiplicative'  # 或 'additive'
)
 
# 训练
model.fit(df)
 
# 创建未来日期
future = model.make_future_dataframe(periods=30)
print(future.tail())
 
# 预测
forecast = model.predict(future)
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())
 
# 可视化组件
model.plot_components(forecast)

5.2 高级配置

from prophet import Prophet
 
# 自定义季节性
model = Prophet().add_seasonality(
    name='monthly',
    period=30.5,
    fourier_order=5
).add_seasonality(
    name='quarterly',
    period=91.25,
    fourier_order=10
)
 
# 添加假日效应
holidays = pd.DataFrame({
    'holiday': 'company_event',
    'ds': pd.to_datetime(['2023-03-15', '2023-08-01']),
    'lower_window': 0,
    'upper_window': 1,
})
model = Prophet(holidays=holidays)
 
# 添加外生变量
df['special_event'] = np.random.randint(0, 2, len(df))
model = Prophet()
model.add_regressor('special_event')
 
# 分段预测(处理趋势变化点)
model = Prophet(
    n_changepoints=25,
    changepoint_prior_scale=0.5,  # 趋势灵活度
)

5.3 交叉验证

from prophet.diagnostics import cross_validation, performance_metrics
 
# 交叉验证
cv_results = cross_validation(
    model,
    initial='180 days',  # 初始训练期
    period='30 days',    # 预测期
    horizon='30 days'   # 评估期
)
 
# 计算性能指标
df_metrics = performance_metrics(cv_results)
print(df_metrics[['horizon', 'mape', 'mae', 'rmse']].head())
 
# 调参
from prophet.diagnostics import grid_search
 
param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
}
 
grid_search_results = grid_search(
    param_grid,
    y=df,
    initial='180 days',
    period='30 days',
    horizon='30 days',
    metric='mape'
)
 
print(grid_search_results)

六、工具选择指南

6.1 按任务类型选择

任务类型推荐工具原因
快速原型Darts, ProphetAPI 简洁,文档友好
生产部署StatsForecast, NeuralForecast高性能,支持并行
学术研究SKtime方法全面,可扩展
业务分析Prophet自动节假日处理
深度学习NeuralForecast, Darts支持最新模型

6.2 性能对比

# 性能基准测试示例
import time
import numpy as np
import pandas as pd
 
def benchmark_forecast(data, horizon, models):
    """基准测试不同模型"""
    results = {}
    
    for name, model in models.items():
        start = time.time()
        try:
            model.fit(data)
            model.predict(horizon)
            elapsed = time.time() - start
            results[name] = {'time': elapsed, 'status': 'success'}
        except Exception as e:
            results[name] = {'time': None, 'status': f'error: {e}'}
    
    return results
 
# 示例
data = np.random.randn(1000)
models = {
    'ARIMA': ARIMA(order=(5, 1, 0)),
    'ETS': ExponentialSmoothing(),
    'Theta': Theta(),
}
 
results = benchmark_forecast(data, 30, models)
for name, res in results.items():
    print(f"{name}: {res}")

6.3 代码示例:完整流程

"""
完整的时间序列预测流程
"""
 
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import ARIMA, ExponentialSmoothing, NBEATSModel
from darts.metrics import mae, mse, rmse
from darts.dataprocessing.transformers import Scaler
 
# 1. 数据加载
df = pd.read_csv('your_data.csv')
 
# 2. 数据探索
print(df.info())
print(df.describe())
 
# 3. 预处理
# 处理缺失值
df = df.interpolate(method='linear')
 
# 处理异常值(使用 IQR)
Q1 = df['value'].quantile(0.25)
Q3 = df['value'].quantile(0.75)
IQR = Q3 - Q1
df = df[(df['value'] >= Q1 - 1.5 * IQR) & 
        (df['value'] <= Q3 + 1.5 * IQR)]
 
# 4. 创建时间序列对象
ts = TimeSeries.from_dataframe(
    df, 
    time_col='date', 
    value_cols='value'
)
 
# 5. 训练/测试分割
train, test = ts.split_after(pd.Timestamp('2023-10-01'))
 
# 6. 模型训练
models = {
    'ARIMA': ARIMA(p=5, d=1, q=2, seasonal_order=(1, 1, 1, 7)),
    'ETS': ExponentialSmoothing(seasonal_periods=7),
}
 
forecasts = {}
for name, model in models.items():
    model.fit(train)
    forecasts[name] = model.predict(len(test))
 
# 7. 评估
for name, forecast in forecasts.items():
    print(f"\n{name}:")
    print(f"  MAE: {mae(test, forecast):.4f}")
    print(f"  MSE: {mse(test, forecast):.4f}")
    print(f"  RMSE: {rmse(test, forecast):.4f}")
 
# 8. 可视化
import matplotlib.pyplot as plt
 
plt.figure(figsize=(14, 6))
train.plot(label='Train')
test.plot(label='Test')
for name, forecast in forecasts.items():
    forecast.plot(label=name)
plt.legend()
plt.title('Time Series Forecast Comparison')
plt.show()

七、参考链接

官方文档

安装命令

# 统计模型
pip install statsforecast
 
# 深度学习模型
pip install neuralforecast
 
# 统一接口
pip install darts
 
# sklearn 兼容
pip install sktime
 
# 业务预测
pip install prophet

相关阅读