Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions kaggle_submission.py
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

def create_submission(predictions, output_path, num_structures=5):
"""
Create submission file from predictions.

Args:
    predictions: dict with 'coords' (N, L, num_structures, 3) and 'sequences'
    output_path: path to save submission.csv
    num_structures: number of structures to predict (default 5)
"""
coords = predictions['coords']  # Expected shape: (num_seq, max_len, num_struct, 3)
sequences = predictions['sequences']

rows = []
residue_idx = 0

for seq_idx in range(len(sequences)):
    seq = sequences[seq_idx]
    for pos, nucleotide in enumerate(seq):
        row = {
            'id': residue_idx,
            'resname': nucleotide,
            'resid': pos + 1
        }
        
        # Add coordinates for each structure
        for struct_idx in range(num_structures):
            x, y, z = coords[seq_idx, pos, struct_idx, :]
            
            row[f'x_{struct_idx + 1}'] = x
            row[f'y_{struct_idx + 1}'] = y
            row[f'z_{struct_idx + 1}'] = z
        
        rows.append(row)
        residue_idx += 1

df = pd.DataFrame(rows)
df.to_csv(output_path, index=False)
print(f"Saved submission to {output_path}")
return df

Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ def predict_structures(

# Store predictions
for j, (idx, row) in enumerate(batch_df.iterrows()):
seq_len = len(row['sequence'])
seq_len = min(len(row['sequence']), max_length)
target_id = row['target_id']
sequence = row['sequence']
sequence = row['sequence'][:max_length] # Truncate sequence to max_length
Comment on lines +153 to +155
Copy link

Copilot AI Jan 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change fixes the IndexError for sequences longer than max_length, but there is no automated test covering the case where len(row['sequence']) > max_length for the predict_structurescreate_submission pipeline; adding a regression test that constructs a sequence longer than max_length and asserts that submission generation completes without index errors (and that only the first max_length residues are emitted) would help prevent this bug from reappearing.

Copilot uses AI. Check for mistakes.
coords = pred_coords[j, :seq_len, :, :]

predictions.append({
Expand Down
Loading