Support us and view this ad

可选:点击以支持我们的网站

免费文章

以下是一个完整的 Att-BiLSTM 适配驾驶行为数据 的 PyTorch 实现脚本。它包含了从模拟数据生成、模型定义、训练到评估的全流程。代码中已针对驾驶行为时间序列(多维特征)进行了修改,并附有详细注释,方便你替换为真实数据(如 Garmin 手表 + OBD 设备采集的 CSV 文件)。   """驾驶行为风险预测模型 - Att-BiLSTM 实现数据格式:时间序列,每个样本 shape = (timesteps, features)任务:二分类(高风险/低风险)或回归(风险概率)""" import numpy as npimport torchimport torch.nn as nnimport torch.optim as optimfrom torch.utils.data import DataLoader, Datasetfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import accuracy_score, f1_score, roc_auc_scoreimport matplotlib.pyplot as pltimport osimport pandas as pd # 设置随机种子,保证可重复性def set_seed(seed=42):np.random.seed(seed)torch.manual_seed(seed)if torch.cuda.is_available():torch.cuda.manual_seed_all(seed) set_seed(42) # 设备配置device = torch.device("cuda" if torch.cuda.is_available() else "cpu")print(f"Using device: {device}") # ==================== 1. 数据准备 ==================== def generate_synthetic_driving_data(num_samples=10000, timesteps=60, num_features=20):"""生成模拟驾驶行为数据真实场景应从 CSV 或数据库加载"""X = np.random.randn(num_samples, timesteps, num_features).astype(np.float32)# 生成标签:高风险(1)或低风险(0)# 这里用一个简单规则:某些特征(如急加速次数)的统计量决定风险# 例如:计算每个样本在时间维度上的均值,如果第5个特征的均值>0.5,则为高风险feature_5_mean = X[:, :, 4].mean(axis=1) # 假设第5个特征是“急加速度”y = (feature_5_mean > 0.5).astype(int) # 二分类标签return X, y def load_real_data(csv_path, timesteps=60, feature_cols=None, label_col='label'):"""从 CSV 加载真实数据(示例,需根据实际格式调整)CSV 应包含时间步索引、样本ID、特征列、标签列"""df = pd.read_csv(csv_path)# 假设 CSV 列:'sample_id', 'time_step', 'feature1', 'feature2', ..., 'label'# 需要按样本ID和时间步整理为 (num_samples, timesteps, num_features)samples = df.groupby('sample_id')X_list, y_list = [], []for name, group in samples:group = group.sort_values('time_step')if len(group) >= timesteps:# 取前 timesteps 个时间步group = group.iloc[:timesteps]features = group[feature_cols].values.astype(np.float32)label = group[label_col].iloc[0]X_list.append(features)y_list.append(label)X = np.stack(X_list)y = np.array(y_list)return X, y # 生成模拟数据X, y = generate_synthetic_driving_data(num_samples=5000, timesteps=60, num_features=20)print(f"数据形状: X {X.shape}, y {y.shape}") # 划分训练集、验证集、测试集X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)print(f"训练集: {X_train.shape}, 验证集: {X_val.shape}, 测试集: {X_test.shape}") # PyTorch Datasetclass DrivingDataset(Dataset):def __init__(self, X, y):self.X = torch.tensor(X, dtype=torch.float32)self.y = torch.tensor(y, dtype=torch.long) # 分类用 long,回归用 floatdef __len__(self):return len(self.X)def __getitem__(self, idx):return self.X[idx], self.y[idx] train_dataset = DrivingDataset(X_train, y_train)val_dataset = DrivingDataset(X_val, y_val)test_dataset = DrivingDataset(X_test, y_test) batch_size = 64train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) # ==================== 2. 模型定义 ==================== class Attention(nn.Module):"""自定义注意力层"""def __init__(self, hidden_size):super(Attention, self).__init__()self.attention_weights = nn.Linear(hidden_size, 1, bias=False)def forward(self, lstm_outputs):# lstm_outputs: (batch, seq_len, hidden_size)attn_scores = self.attention_weights(lstm_outputs) # (batch, seq_len, 1)attn_weights = torch.softmax(attn_scores, dim=1) # (batch, seq_len, 1)context = torch.sum(attn_weights * lstm_outputs, dim=1) # (batch, hidden_size)return context, attn_weights class AttBiLSTM(nn.Module):"""Att-BiLSTM 模型,适配驾驶行为时间序列输入: (batch, seq_len, input_size)输出: (batch, num_classes) 或 (batch, 1) 用于回归"""def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout=0.5, bidirectional=True):super(AttBiLSTM, self).__init__()self.hidden_size = hidden_sizeself.num_layers = num_layersself.bidirectional = bidirectional# 移除原始论文中的嵌入层,因为输入已经是数值特征# 可以加一个线性变换来调整维度(可选)self.input_proj = nn....

继续阅读完整内容

支持我们的网站,请点击查看下方广告

正在加载广告...

登陆