From 28e49066c21faff0e56e885fb8f75edba3a40ae2 Mon Sep 17 00:00:00 2001 From: qzmalekuz Date: Fri, 20 Mar 2026 01:50:30 +0530 Subject: [PATCH 1/2] Add SBOL to CSV dataset conversion script --- example.xml | 16 ++++++++++++++ scripts/sbol_to_dataset.py | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 example.xml create mode 100644 scripts/sbol_to_dataset.py diff --git a/example.xml b/example.xml new file mode 100644 index 0000000..ebfcf0e --- /dev/null +++ b/example.xml @@ -0,0 +1,16 @@ + + + + + + ATGCGTACGTAGCTAG + + + + + + + + \ No newline at end of file diff --git a/scripts/sbol_to_dataset.py b/scripts/sbol_to_dataset.py new file mode 100644 index 0000000..fbacbb2 --- /dev/null +++ b/scripts/sbol_to_dataset.py @@ -0,0 +1,44 @@ +from sbol2 import Document +import pandas as pd +import argparse + + +def sbol_to_csv(input_file, output_file="dataset.csv"): + doc = Document() + doc.read(input_file) + + data = [] + + for comp in doc.componentDefinitions: + seq = None + + try: + if comp.sequences: + seq_id = comp.sequences[0] + sequence_obj = doc.sequences.get(seq_id) + + if sequence_obj: + seq = sequence_obj.elements + except Exception: + continue + + if seq: + data.append({ + "sequence": seq, + "label": "unknown" + }) + + df = pd.DataFrame(data) + df.to_csv(output_file, index=False) + + print(f"✅ Dataset saved to {output_file}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert SBOL to CSV dataset") + parser.add_argument("--input", required=True, help="Input SBOL file") + parser.add_argument("--output", default="dataset.csv", help="Output CSV file") + + args = parser.parse_args() + + sbol_to_csv(args.input, args.output) \ No newline at end of file From c58b32d818f25fb600b481fa15f422d92e22c356 Mon Sep 17 00:00:00 2001 From: qzmalekuz Date: Sun, 22 Mar 2026 16:51:06 +0530 Subject: [PATCH 2/2] Remove example.xml from PR --- example.xml | 16 ---------------- scripts/sbol_to_dataset.py | 2 +- 2 files changed, 1 insertion(+), 17 deletions(-) delete mode 100644 example.xml diff --git a/example.xml b/example.xml deleted file mode 100644 index ebfcf0e..0000000 --- a/example.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - ATGCGTACGTAGCTAG - - - - - - - - \ No newline at end of file diff --git a/scripts/sbol_to_dataset.py b/scripts/sbol_to_dataset.py index fbacbb2..a19c5e3 100644 --- a/scripts/sbol_to_dataset.py +++ b/scripts/sbol_to_dataset.py @@ -31,7 +31,7 @@ def sbol_to_csv(input_file, output_file="dataset.csv"): df = pd.DataFrame(data) df.to_csv(output_file, index=False) - print(f"✅ Dataset saved to {output_file}") + print(f"Dataset saved to {output_file}") if __name__ == "__main__":