diff --git a/demo.ipynb b/demo.ipynb new file mode 100644 index 0000000..38b6537 --- /dev/null +++ b/demo.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8dbbe089-484a-4539-acac-b4e324494f7c", + "metadata": {}, + "source": [ + "# Demo run" + ] + }, + { + "cell_type": "markdown", + "id": "844ab0fd-5e73-4629-b2fb-40ce50b755fe", + "metadata": {}, + "source": [ + "This notebook shows an example way of using FRIdata toolbox." + ] + }, + { + "cell_type": "markdown", + "id": "697ce575-1fb5-4f58-b881-bfd91220618c", + "metadata": {}, + "source": [ + "## Prerequisities" + ] + }, + { + "cell_type": "markdown", + "id": "91f9b031-94bf-4d42-83ce-d433c9677258", + "metadata": {}, + "source": [ + "- conda environment is set (accordingly to README.md file)\n", + "- FRIdata CLI tool is created\n", + "- File with IDs" + ] + }, + { + "cell_type": "markdown", + "id": "4328af25-ac41-4061-be92-f4b195902bdc", + "metadata": {}, + "source": [ + "## 1. Create a dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9db9ff88-a90e-4e9a-a2c2-0ad243ad96f6", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "\n", + "export PATH_TO_DATA=\"\"\n", + "export IDS_FILE_PATH=\"\"\n", + "export DASHBOARD_NAME=\"testset_inference_over\"\n", + "export AFDB_PATH=\"$PATH_TO_DATA/afdb/structures\"\n", + "\n", + "eval \"$(conda shell.bash hook)\"\n", + "conda activate tbe\n", + "\n", + "fridata generate_data \\\n", + " -t dataset \\\n", + " -d AFDB \\\n", + " -c subset \\\n", + " --overwrite \\\n", + " --version ${DASHBOARD_NAME} \\\n", + " -i ${IDS_FILE_PATH} \\\n", + " --input-path ${AFDB_PATH} \\\n", + " -e \"esm2_t33_650M_UR50D\"" + ] + }, + { + "cell_type": "markdown", + "id": "f867009b-d9d5-4038-8669-6bc9bf1c40b5", + "metadata": {}, + "source": [ + "## 2. Create dashboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4203f2e3-3f3d-4a80-8cb3-4ed3b6b9c9ba", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "\n", + "# Path to dataset.json generated from 1. task\n", + "export PATH_TO_DATA=\"\"\n", + "export PATH_TO_DATASET_JSON=\"$PATH_TO_DATA/datasets/AFDB-subset--testset_inference_over/dataset.json\"\n", + "\n", + "eval \"$(conda shell.bash hook)\"\n", + "conda activate tbe\n", + "\n", + "fridata create_dashboard \\\n", + " --dataset ${PATH_TO_DATASET_JSON}" + ] + }, + { + "cell_type": "markdown", + "id": "2b91b83c-3ff5-41cd-8c4d-2971baadd2f0", + "metadata": {}, + "source": [ + "Generated dashboard is saved to \"./reports/AFDB-subset--testset_inference_over.html\"" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "50a15ebb-9f64-4846-9587-0b6984727891", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import IFrame\n", + "\n", + "IFrame(src='reports/AFDB-subset--testset_inference_over.html', width=700, height=600)" + ] + }, + { + "cell_type": "markdown", + "id": "76ae7b67-5565-41c2-97c3-91f9a63ba542", + "metadata": {}, + "source": [ + "## 3. Loading dataset for more details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea69cf70-2c29-47d4-a6d8-5153c7a9b1b3", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "\n", + "export PATH_TO_DATA=\"\"\n", + "\n", + "eval \"$(conda shell.bash hook)\"\n", + "conda activate tbe\n", + "\n", + "fridata load --file-path \"$PATH_TO_DATA/datasets/AFDB-subset--testset_inference_over\"" + ] + }, + { + "cell_type": "markdown", + "id": "15688d7f-e57f-4305-b8ee-5b9db79fefe3", + "metadata": {}, + "source": [ + "Results:\n", + "\n", + "```\n", + "db_type= collection_type=\n", + "proteome=''\n", + "version='testset_inference_over'\n", + "ids_file=PosixPath('') \n", + "seqres_file=None \n", + "archive_path=None \n", + "overwrite=True \n", + "batch_size=1000 \n", + "binary_data_download=False \n", + "is_hpc_cluster=False \n", + "input_path=PosixPath('') \n", + "embedder_type= \n", + "embedding_size=1280 \n", + "created_at='1778013094005832' \n", + "config=Config(debug_mode='warning', data_path='', disto_type='CA', disto_thr='inf', separator='-', batch_size=1000)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5fe4827-1f06-4a27-874b-b26b158ad784", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "\n", + "export PATH_TO_DATA=\"\"\n", + "export PATH_TO_STRUCTURES=\"$PATH_TO_DATA/structures/AFDB/subset_/testset_inference_over/0/pdbs.h5\"\n", + "export PATH_TO_EMBEDDINGS=\"$PATH_TO_DATA/embeddings/AFDB-subset--_test_dask/batch_0.h5\"\n", + "export PATH_TO_DISTOGRAMS=\"$PATH_TO_DATA/distograms/AFDB-subset--_test_dask/batch_0.h5\"\n", + "export PATH_TO_COORDINATES=\"$PATH_TO_DATA/coordinates/AFDB-subset--_test_dask/batch_0_ca.h5\"\n", + "\n", + "eval \"$(conda shell.bash hook)\"\n", + "conda activate tbe\n", + "\n", + "fridata inspect_h5 --help" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/reports/AFDB-subset--testset_inference_over.html b/reports/AFDB-subset--testset_inference_over.html new file mode 100644 index 0000000..abe5670 --- /dev/null +++ b/reports/AFDB-subset--testset_inference_over.html @@ -0,0 +1,124 @@ + + + + + + AFDB-subset--testset_inference_over + + + + + +
+ +