HackTheBox Death’s Glance Challenge
https://app.hackthebox.com/challenges/740
Description
You find yourself in the possession of an ancient forbidden spell. Rumors have it that by revealing the rune originated from the spell, the mystery behind the way you perish will be unveiled and sealed!
References
https://github.com/mit-han-lab/dlg
Exploitation
#!/usr/bin/python3
import torch
import torch.nn as nn
torch.manual_seed(50)
device = "cpu"
def weights_init(m):
if hasattr(m, "weight"):
m.weight.data.uniform_(-0.5, 0.5)
if hasattr(m, "bias"):
m.bias.data.uniform_(-0.5, 0.5)
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
act = nn.Sigmoid
self.body = nn.Sequential(
nn.Conv2d(1, 12, kernel_size=5, padding=5//2, stride=2),
act(),
nn.Conv2d(12, 12, kernel_size=5, padding=5//2, stride=2),
act(),
nn.Conv2d(12, 12, kernel_size=5, padding=5//2, stride=1),
act(),
nn.Conv2d(12, 12, kernel_size=5, padding=5//2, stride=1),
act(),
)
self.fc = nn.Sequential(
nn.Linear(768, 100)
)
def forward(self, x):
out = self.body(x)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
net = LeNet().to(device)
net.apply(weights_init)
criterion = nn.CrossEntropyLoss()
import torch.nn.functional as F
from torchvision import transforms
import matplotlib.pyplot as plt
import os
def cross_entropy_for_onehot(pred, target):
return torch.mean(torch.sum(- target * F.log_softmax(pred, dim=-1), 1))
tt = transforms.ToPILImage()
original_dy_dx = torch.load('forbidden_spell.pt')
original_dy_dx = tuple(original_dy_dx)
dummy_data = torch.randn((1, 1, 32, 32)).to(device).requires_grad_(True)
dummy_label = torch.randn((1, 100)).to(device).requires_grad_(True)
optimizer = torch.optim.LBFGS([dummy_data, dummy_label])
criterion = cross_entropy_for_onehot
for iters in range(100):
def closure():
optimizer.zero_grad()
dummy_pred = net(dummy_data)
dummy_onehot_label = F.softmax(dummy_label, dim=-1)
dummy_loss = criterion(dummy_pred, dummy_onehot_label)
dummy_dy_dx = torch.autograd.grad(dummy_loss, net.parameters(), create_graph=True)
grad_diff = 0
for gx, gy in zip(dummy_dy_dx, original_dy_dx):
grad_diff += ((gx - gy) ** 2).sum()
grad_diff.backward()
return grad_diff
optimizer.step(closure)
if iters % 10 == 0:
current_loss = closure()
print(iters, "%.4f" % current_loss.item())
plt.imshow(tt(dummy_data[0].cpu()))
os.makedirs('save', exist_ok=True)
plt.savefig(f'save/{iters}.png')
torch.save(dummy_data[0].cpu(), f'save/{iters}.pt')
From the 30th iteration onward, the process starts to work.
You can use QR Scanner for decoding, or alternatively, you can install and use qr-scanner-cli:
sudo npm i -g qr-scanner-cli
qrscanner save/30.png
Summary
Death’s Glance: shape the prompt path, bypass the model guard, and recover the target output.