输入变换防御
引言
**输入变换防御(Input Transformation Defense)**是一类轻量级对抗防御方法,通过在模型前添加预处理步骤来消除或减少对抗扰动。1
方法分类
| 类型 | 方法 | 原理 |
|---|---|---|
| 压缩类 | JPEG压缩、位深度减少 | 丢弃高频成分 |
| 随机化 | 随机填充、随机调整大小 | 破坏攻击模式 |
| 特征变换 | 图像滤波、锐化 | 改变特征空间 |
| 重建类 | 去噪、修复 | 恢复原始分布 |
图像压缩防御
JPEG压缩
利用JPEG压缩丢弃高频扰动成分:
from PIL import Image
import io
def jpeg_compress(x, quality=75):
"""
JPEG压缩防御。
JPEG压缩自然丢弃高频成分,对抗扰动主要位于高频。
"""
# 转换为PIL Image
if isinstance(x, torch.Tensor):
x = transforms.ToPILImage()(x.cpu())
# 压缩
buffer = io.BytesIO()
x.save(buffer, format='JPEG', quality=quality)
buffer.seek(0)
# 解压缩
x_compressed = Image.open(buffer)
return transforms.ToTensor()(x_compressed)位深度减少
减少颜色精度以平滑扰动:
def bit_depth_reduction(x, n_bits=4):
"""
位深度减少防御。
"""
# 量化到n_bits位
scale = 2 ** n_bits
x_quantized = torch.floor(x * scale) / scale
return x_quantized组合压缩
class CombinedCompression:
def __init__(self, jpeg_quality=60, bit_depth=3):
self.jpeg_quality = jpeg_quality
self.bit_depth = bit_depth
def __call__(self, x):
# 1. 位深度减少
x = self.bit_depth_reduction(x, self.bit_depth)
# 2. JPEG压缩
x = self.jpeg_compress(x, self.jpeg_quality)
return x
def bit_depth_reduction(self, x, n_bits):
scale = 2 ** n_bits
return torch.floor(x * scale) / scale
def jpeg_compress(self, x, quality):
# PIL实现
...随机化防御
Random Padding / Resizing
class RandomResizing:
"""
随机调整大小填充防御。
"""
def __init__(self, scales=[0.9, 1.0, 1.1], padding_mode='edge'):
self.scales = scales
self.padding_mode = padding_mode
def __call__(self, x):
# 随机缩放
scale = random.choice(self.scales)
h, w = x.shape[-2:]
new_h, new_w = int(h * scale), int(w * scale)
x_resized = F.interpolate(
x, size=(new_h, new_w),
mode='bilinear', align_corners=False
)
# 填充回原始大小
pad_h = h - new_h
pad_w = w - new_w
if pad_h > 0 or pad_w > 0:
x_resized = F.pad(
x_resized,
[pad_w//2, pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2],
mode=self.padding_mode
)
return x_resizedRandom Padding
def random_padding(x, pad_size=4, padding_mode='constant'):
"""
随机填充防御。
"""
# 随机填充大小
pad_h = random.randint(0, pad_size)
pad_w = random.randint(0, pad_size)
return F.pad(
x,
[pad_w, pad_w, pad_h, pad_h],
mode=padding_mode
)随机化集成
class RandomizedDefense:
"""
随机化防御集成。
"""
def __init__(self, model, n_transforms=8):
self.model = model
self.n_transforms = n_transforms
self.transforms = [
RandomResizing(scales=[0.95]),
RandomResizing(scales=[1.05]),
RandomPadding(pad_size=4),
GaussianDenoising(sigma=0.1),
BitDepthReduction(n_bits=4),
]
def predict(self, x):
outputs = []
for _ in range(self.n_transforms):
# 随机选择变换
transform = random.choice(self.transforms)
x_transformed = transform(x)
# 预测
output = self.model(x_transformed)
outputs.append(output)
# 集成平均
return torch.stack(outputs).mean(dim=0)滤波防御
高斯滤波
def gaussian_filter(x, kernel_size=5, sigma=1.0):
"""
高斯滤波防御。
"""
if isinstance(x, torch.Tensor):
# 创建高斯核
kernel = self._create_gaussian_kernel(
kernel_size, sigma, x.device
)
# 应用滤波
return F.conv2d(
x, kernel, padding=kernel_size//2
)
return x
def _create_gaussian_kernel(self, size, sigma, device):
ax = torch.arange(-size // 2 + 1., size // 2 + 1., device=device)
xx, yy = torch.meshgrid(ax, ax, indexing='ij')
kernel = torch.exp(-(xx**2 + yy**2) / (2. * sigma**2))
kernel = kernel / kernel.sum()
return kernel.view(1, 1, size, size)中值滤波
def median_filter(x, kernel_size=3):
"""
中值滤波防御。
"""
if len(x.shape) == 4: # [B, C, H, W]
B, C, H, W = x.shape
# 使用torch实现(简化版)
unfolded = F.unfold(x, kernel_size, padding=kernel_size//2)
unfolded = unfolded.transpose(1, 2) # [B, H*W, C*k*k]
# 计算中位数
median = unfolded.median(dim=-1)[0] # [B, H*W]
median = median.view(B, 1, H, W)
return median
return x去噪防御
图像修复网络
class DenoiseDefense(nn.Module):
"""
基于去噪网络的防御。
"""
def __init__(self):
super().__init__()
self.denoiser = UNet(in_channels=3, out_channels=3)
def forward(self, x):
return self.denoiser(x)
# 预训练去噪器
class PreTrainedDenoiser:
def __init__(self):
self.denoiser = load_pretrained_model('DnCNN')
def __call__(self, x):
with torch.no_grad():
return self.denoiser(x)TV正则化
全变差(Total Variation)正则化可以平滑噪声同时保留边缘:
def tv_denoise(x, lambda_tv=0.1, iterations=100):
"""
TV去噪防御。
"""
x_denoised = x.clone()
for _ in range(iterations):
# 计算梯度
grad_x = x_denoised[:, :, 1:, :] - x_denoised[:, :, :-1, :]
grad_y = x_denoised[:, :, :, 1:] - x_denoised[:, :, :, :-1]
# TV损失
tv_loss = grad_x.abs().mean() + grad_y.abs().mean()
# 反向传播
tv_loss.backward()
# 更新
with torch.no_grad():
x_denoised -= lambda_tv * x_denoised.grad
x_denoised.grad.zero_()
return x_denoised组合防御
随机防御堆栈
class DefenseStack(nn.Module):
"""
组合多种防御方法。
"""
def __init__(self, model):
super().__init__()
self.model = model
self.defenses = nn.ModuleList([
RandomResizing(),
GaussianFilter(kernel_size=3),
BitDepthReduction(n_bits=4),
JPEGCompressor(quality=70),
])
def forward(self, x):
# 随机选择防御
defense = random.choice(self.defenses)
x_defended = defense(x)
return self.model(x_defended)输入净化(Input Purification)
class InputPurificationDefense(nn.Module):
"""
输入净化:训练一个净化器恢复干净样本。
"""
def __init__(self, purifier, classifier):
super().__init__()
self.purifier = purifier
self.classifier = classifier
def forward(self, x):
# 净化输入
x_purified = self.purifier(x)
# 分类
return self.classifier(x_purified)对抗样本检测
局部内在维度检测
def detect_adversarial_by_lid(x, model, k=20):
"""
基于局部内在维度(LID)的对抗样本检测。
"""
batch_size = x.shape[0]
# 计算LID
lids = []
for i in range(batch_size):
xi = x[i:i+1]
# 在特征空间计算
with torch.no_grad():
features = extract_features(model, xi)
# LID估计
lid = estimate_lid(features, k)
lids.append(lid)
lids = torch.tensor(lids)
# 阈值检测
threshold = 10.0 # 经验阈值
return lids < threshold方法比较
| 方法 | 防御效果 | 计算开销 | 精度损失 |
|---|---|---|---|
| JPEG压缩 | 中等 | 低 | 小 |
| 位深度减少 | 低-中 | 极低 | 极小 |
| 随机化 | 中等 | 低 | 小 |
| 高斯滤波 | 中等 | 低 | 极小 |
| 去噪网络 | 高 | 高 | 中等 |
| TV去噪 | 中等 | 高 | 小 |
局限性
适应性攻击
攻击者可以针对变换进行适应:
# 适应性攻击示例
def adaptive_attack(model, x, y, defense, epsilon=0.03):
"""
适应输入变换的对抗攻击。
"""
x_adv = x.clone()
for _ in range(10):
x_adv.requires_grad = True
# 应用防御
x_defended = defense(x_adv)
# 计算损失
loss = F.cross_entropy(model(x_defended), y)
# 反向传播
loss.backward()
with torch.no_grad():
x_adv = x_adv + 0.01 * x_adv.grad.sign()
x_adv = torch.clamp(x_adv, x - epsilon, x + epsilon)
return x_adv变换不变攻击
攻击者可以绕过输入变换:
# 生成变换不变对抗样本
def transformationInvariantAttack(model, x, y, defenses):
"""
生成对多种变换鲁棒的对抗样本。
"""
x_adv = x.clone()
for _ in range(20):
x_adv.requires_grad = True
# 多个变换的集成损失
loss = 0
for defense in defenses:
x_defended = defense(x_adv)
loss += F.cross_entropy(model(x_defended), y)
loss /= len(defenses)
loss.backward()
with torch.no_grad():
x_adv = x_adv + 0.01 * x_adv.grad.sign()
return x_adv本章小结
输入变换防御是一类实用的轻量级防御方法:
- 图像压缩:JPEG压缩、位深度减少丢弃高频扰动
- 随机化:破坏攻击模式的确定性
- 滤波:高斯滤波、中值滤波平滑噪声
- 去噪:TV去噪、神经网络去噪
- 组合防御:集成多种方法提高鲁棒性
- 局限性:适应性攻击可能绕过
输入变换防御可以作为对抗训练等强防御的补充。
参考文献
Footnotes
-
Xu, W., et al. (2017). Feature Denoising for Improving Adversarial Robustness. CVPR 2019. ↩