Skip to content

Commit

Permalink
Hack to get mamba running, but I'm gonna need a new loader
Browse files Browse the repository at this point in the history
  • Loading branch information
jloveric committed Dec 29, 2023
1 parent 00a3cbe commit 0e6c86c
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
1 change: 1 addition & 0 deletions language_interpolation/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ class ClassificationMixin:
def eval_step(self, batch: Tensor, name: str):
x, y, idx = batch
y_hat = self(x)
print('y_hat.shape',y_hat.shape, 'y shape', y.shape)
loss = self.loss(y_hat, y.flatten())

diff = torch.argmax(y_hat, dim=1) - y.flatten()
Expand Down
5 changes: 3 additions & 2 deletions language_interpolation/state_space_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def forward(self, input_ids):
class MambaLMHeadModel, https://github.com/state-spaces/mamba/blob/main/mamba_ssm/models/mixer_seq_simple.py#L173
"""
print('input_ids', input_ids)
print('input_ids.shape', input_ids.shape)
reshaped = input_ids.reshape(input_ids.shape[0], input_ids.shape[1]*input_ids.shape[2])
x = self.embedding(reshaped)
print('x.shape after', x.shape)
Expand All @@ -94,8 +94,9 @@ class MambaLMHeadModel, https://github.com/state-spaces/mamba/blob/main/mamba_ss

x = self.norm_f(x)
logits = self.lm_head(x)
reduced = logits[:,-1,:].reshape(logits.shape[0], logits.shape[2])

return logits
return reduced #logits


class ResidualBlock(nn.Module):
Expand Down

0 comments on commit 0e6c86c

Please sign in to comment.