diff --git a/Dockerfile b/Dockerfile index 58061d0..e084a47 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,4 +8,4 @@ RUN python -m pip install --no-cache-dir --no-cache /opt/PartiNet LABEL AUTHORS Mihin Perera, Edward Yang, Julie Iskander LABEL MAINTAINERS Mihin Perera, Edward Yang, Julie Iskander -LABEL VERSION v0.2.0 +LABEL VERSION v1.0.1 diff --git a/README.md b/README.md index 9bf15d5..b5f83ee 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ -# PartiNet ๐Ÿ”ฌ +# PartiNet + +![PartiNet RMSD](docs/static/img/partinet_RMSD.png) PartiNet is a three-stage pipeline for automated particle picking in cryo-EM micrographs, combining advanced denoising with state-of-the-art deep learning detection. @@ -9,12 +11,12 @@ Use our pretrained model at [Model Weights](https://huggingface.co/MihinP/PartiN ## Features -- ๐Ÿงน Heuristic denoising for improved signal-to-noise ratio -- ๐ŸŽฏ Dynamic deep learning particle detection -- โšก Multi-GPU support for faster processing -- ๐Ÿ”„ Seamless integration with cryoSPARC and RELION workflows -- ๐Ÿ“Š Confidence-based particle filtering -- ๐Ÿ–ผ๏ธ Visual detection validation +- Heuristic denoising for improved signal-to-noise ratio +- Dynamic deep learning particle detection +- Multi-GPU support for faster processing +- Seamless integration with cryoSPARC and RELION workflows +- Confidence-based particle filtering +- Visual detection validation ## Prerequisites @@ -49,12 +51,12 @@ docker run --gpus all -v /path/to/your/data:/data \ ``` project_directory/ -โ”œโ”€โ”€ motion_corrected/ # ๐Ÿ“ Input micrographs -โ”œโ”€โ”€ denoised/ # ๐Ÿงน Denoised outputs -โ”œโ”€โ”€ exp/ # ๐ŸŽฏ Detection results -โ”‚ โ”œโ”€โ”€ labels/ # ๐Ÿ“‹ Coordinates -โ”‚ โ””โ”€โ”€ ... # ๐Ÿ–ผ๏ธ Visualizations -โ””โ”€โ”€ partinet_particles.star # โญ Final output +โ”œโ”€โ”€ motion_corrected/ # Input micrographs +โ”œโ”€โ”€ denoised/ # Denoised outputs +โ”œโ”€โ”€ exp/ # Detection results +โ”‚ โ”œโ”€โ”€ labels/ # Coordinates +โ”‚ โ””โ”€โ”€ ... # Visualizations +โ””โ”€โ”€ partinet_particles.star # Final output ``` ## Pipeline Stages @@ -94,7 +96,8 @@ partinet star \ - `*.png`: Visualization overlays 3. **STAR File** (`partinet_particles.star`) - - Ready for RELION processing + - Ready for CryoSPARC processing + - Additional commands available for RELION compatible output ## Advanced Usage @@ -145,3 +148,5 @@ Citation information will be added upon publication For issues and questions: - Open an [Issue](https://github.com/WEHI-ResearchComputing/PartiNet/issues) - Check existing [Discussions](https://github.com/WEHI-ResearchComputing/PartiNet/discussions) + + diff --git a/Singularity b/Singularity index 472726e..a6ffbb6 100644 --- a/Singularity +++ b/Singularity @@ -18,4 +18,4 @@ from: python:3.9.19-slim-bookworm %labels AUTHORS Mihin Perera, Edward Yang, Julie Iskander MAINTAINERS Mihin Perera, Edward Yang, Julie Iskander - VERSION v1.0.0 \ No newline at end of file + VERSION v1.0.1 \ No newline at end of file diff --git a/docs/docs/getting-started.md b/docs/docs/getting-started.md index 4b90a31..9ec1cbf 100644 --- a/docs/docs/getting-started.md +++ b/docs/docs/getting-started.md @@ -20,23 +20,23 @@ PartiNet expects and creates the following directory structure: ``` project_directory/ -โ”œโ”€โ”€ motion_corrected/ # ๐Ÿ“ Your input micrographs +โ”œโ”€โ”€ motion_corrected/ # Your soft-linked input micrographs โ”‚ โ”œโ”€โ”€ micrograph1.mrc โ”‚ โ”œโ”€โ”€ micrograph2.mrc โ”‚ โ””โ”€โ”€ ... -โ”œโ”€โ”€ denoised/ # ๐Ÿงน Created by denoise stage +โ”œโ”€โ”€ denoised/ # Created by denoise stage โ”‚ โ”œโ”€โ”€ micrograph1.mrc โ”‚ โ”œโ”€โ”€ micrograph2.mrc โ”‚ โ””โ”€โ”€ ... -โ”œโ”€โ”€ exp/ # ๐ŸŽฏ Created by detect stage -โ”‚ โ”œโ”€โ”€ labels/ # ๐Ÿ“‹ Detection coordinates +โ”œโ”€โ”€ exp/ # Created by detect stage +โ”‚ โ”œโ”€โ”€ labels/ # Detection coordinates (YOLO format) โ”‚ โ”‚ โ”œโ”€โ”€ micrograph1.txt โ”‚ โ”‚ โ”œโ”€โ”€ micrograph2.txt โ”‚ โ”‚ โ””โ”€โ”€ ... -โ”‚ โ”œโ”€โ”€ micrograph1.png # ๐Ÿ–ผ๏ธ Micrographs with detections drawn +โ”‚ โ”œโ”€โ”€ micrograph1.png # Micrographs with detections drawn โ”‚ โ”œโ”€โ”€ micrograph2. โ”‚ โ””โ”€โ”€ ... -โ””โ”€โ”€ partinet_particles.star # โญ Final STAR file (created by star stage) +โ””โ”€โ”€ partinet_particles.star # CryoSPARC-style STAR file (created by star stage) ``` **Pipeline Flow:** diff --git a/docs/docs/intro.md b/docs/docs/intro.md index 3b5f4b6..bf4c536 100644 --- a/docs/docs/intro.md +++ b/docs/docs/intro.md @@ -32,7 +32,7 @@ The final stage prepares particle data for further processing and provides repor PartiNet is ideal for: - Identifying rare views - Picking on heterogeneous datasets -- Reporting on particle populations +- High speed picking ## Next Steps @@ -40,9 +40,6 @@ PartiNet is ideal for: - **Ready to begin?** Follow our [Getting Started](getting-started.md) guide for your first analysis - **Need specific details?** Check the individual stage documentation: [Denoise](stages/denoise.md), [Detect](stages/detect.md), [Star](stages/star.md) - \ No newline at end of file +If you encounter issues or need assistance please raise an issue on the GitHub diff --git a/docs/docs/stages/denoise.md b/docs/docs/stages/denoise.md index e10d9f4..9114117 100644 --- a/docs/docs/stages/denoise.md +++ b/docs/docs/stages/denoise.md @@ -69,16 +69,16 @@ mkdir motion_corrected **From CryoSPARC:** ```shell # Using symbolic links (faster, saves space) -ln -s /path/to/cryosparc/project/job_number/motioncorrected/*_fractions_patch_aligned.mrc motion_corrected/ +ln -s /path/to/cryosparc/project/JXXX/motioncorrected/*_fractions_patch_aligned.mrc motion_corrected/ # Using rsync (copies files) -rsync /path/to/cryosparc/project/job_number/motioncorrected/*_fractions_patch_aligned.mrc motion_corrected/ +rsync /path/to/cryosparc/project/JXXX/motioncorrected/*_fractions_patch_aligned.mrc motion_corrected/ ``` **From RELION:** ```shell # Link motion-corrected micrographs -ln -s /path/to/relion/project/MotionCorr/job_number/Micrographs/*.mrc motion_corrected/ +ln -s /path/to/relion/project/MotionCorr/jobXXX/*.mrc motion_corrected/ ``` ### 3. Run Denoising @@ -166,7 +166,7 @@ partinet denoise \ ### Different Output Formats -By default PartiNet outputs denoised images in `png` format. This is necessary for compatibility with the detection architecture. `png` is a lossless compression, however micrographs are normalised from 32 bit depth `mrc` files to 8 bit `png`. `jpg` is also available (eg for making figures) but is not recommended for use due to lossy compression. +By default PartiNet outputs denoised images in `png` format. This is necessary for compatibility with the detection architecture. `png` is a lossless compression, however micrographs are normalised from 32 bit depth `mrc` files to 8 bit `png`. `jpg` is also available (as a legacy format) but is not recommended for use due to lossy compression. ```shell # JPEG format (smaller file size, lossy compression) diff --git a/docs/docs/stages/star.md b/docs/docs/stages/star.md index d1d3f67..5f0aad6 100644 --- a/docs/docs/stages/star.md +++ b/docs/docs/stages/star.md @@ -81,12 +81,32 @@ Choose your confidence threshold based on: - Downstream processing requirements - Balance between particle quantity and quality -## Output +## CryoSPARC Output -The command generates a STAR file containing: +The command generates a STAR file compatible with CryoSPARC containing: - Particle coordinates (X, Y positions) - Corresponding micrograph paths +## RELION output + +Use `--relion` and `--relion-project-dir` to generate RELION-compatible STAR outputs under `/partinet`: + +- Micrograph manifest: `/partinet/pick.star` +- Per-micrograph coordinates: `/partinet/movies/.star` + +Example: + +```bash +partinet star \ + --labels /data/partinet_picking/exp/labels \ + --images /data/partinet_picking/denoised \ + --output /data/partinet_picking/output.star \ + --conf 0.2 \ + --relion \ + --relion-project-dir /data/relion/EMPIAR-10089 \ + --mrc-prefix MotionCorr/job003/movies +``` + ## Next Steps @@ -96,8 +116,10 @@ After generating your STAR file, you can: :::warning -PartiNet Denoise flips micrographs in the y-axis due to matrix transpose operations. If you denoised micrographs in PartiNet ensure that when you import and extract particle coordinates that you toggle `Flip in y` in CryoSPARC or similar in RELION prior to particle extraction. +If you are using PartiNet v1.0.0, the Denoise flips micrographs in the y-axis due to matrix transpose operations. If you denoised micrographs in PartiNet ensure that when you import and extract particle coordinates that you toggle `Flip in y` in CryoSPARC prior to particle extraction. This has been fixed in v1.0.1+. You can check your version with `partinet --help` ::: + + diff --git a/docs/src/components/HomepageFeatures/index.tsx b/docs/src/components/HomepageFeatures/index.tsx index 330d966..7210aba 100644 --- a/docs/src/components/HomepageFeatures/index.tsx +++ b/docs/src/components/HomepageFeatures/index.tsx @@ -24,7 +24,7 @@ const FeatureList: FeatureItem[] = [ Svg: require('@site/static/img/undraw_success_288d.svg').default, description: ( <> - PartiNet proides improved identification of rare particle views allowing for comprehensive map reconstruction. + PartiNet provides improved identification of rare particle views allowing for comprehensive map reconstruction. ), }, @@ -33,7 +33,7 @@ const FeatureList: FeatureItem[] = [ Svg: require('@site/static/img/undraw_space-exploration_dhu1.svg').default, description: ( <> - PartiNet provides up to 10ร— faster inference than existing tools enabling real-time, on-the-fly picking. + PartiNet provides up to 7ร— faster inference than existing tools enabling real-time, on-the-fly picking. ), }, diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx index 32fbfe4..e196b77 100644 --- a/docs/src/pages/index.tsx +++ b/docs/src/pages/index.tsx @@ -5,6 +5,7 @@ import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; import Layout from '@theme/Layout'; import HomepageFeatures from '@site/src/components/HomepageFeatures'; import Heading from '@theme/Heading'; +import partinetRMSD from '@site/static/img/partinet_RMSD.png'; import styles from './index.module.css'; @@ -17,6 +18,11 @@ function HomepageHeader() { {siteConfig.title}

{siteConfig.tagline}

+ PartiNet RMSD
+ title={`PartiNet: high performance particle picking for cryo-EM`} + description="PartiNet is a high performance particle picking pipeline for cryo-electron microscopy (cryo-EM) that provides improved identification of rare particle views and up to 7ร— faster inference than existing tools.">
diff --git a/docs/static/img/partinet_RMSD.png b/docs/static/img/partinet_RMSD.png new file mode 100644 index 0000000..f39fc81 Binary files /dev/null and b/docs/static/img/partinet_RMSD.png differ diff --git a/partinet/__init__.py b/partinet/__init__.py index a5aed91..7d78337 100644 --- a/partinet/__init__.py +++ b/partinet/__init__.py @@ -1,7 +1,7 @@ import click import sys, os -__version__ = "1.0.0" +__version__ = "1.0.1" DYNAMICDET_AVAILABLE_MODELS = ["yolov7", "yolov7x", "yolov7-w6", "yolov7-e6", "yolov7-d6", "yolov7-e6e"] @@ -77,12 +77,23 @@ def split(star, images, output, class_id, test_size, split_only): @main.command() @click.option("--labels", type=str, required=True, help="Path to the labels directory") @click.option("--images", type=str, required=True, help="Path to the images directory") -@click.option("--output", type=str, required=True, help="Path to the output STAR file") +@click.option("--output", type=str, required=True, help="Path to the output STAR file (CryoSPARC style)") @click.option("--conf", type=float, default=0.1, help="Minimum confidence threshold from predictions") -def star(labels, images, output,conf): +@click.option("--relion", is_flag=True, default=False, help="Also generate RELION pick.star + per-micrograph star files") +@click.option("--relion-project-dir", type=str, default=None, help="RELION project root (creates /partinet/pick.star and /partinet/movies/*.star)") +@click.option("--mrc-prefix", type=str, default="", help="Prefix path for micrograph names in RELION pick.star (e.g. MotionCorr/job003/movies)") +def star(labels, images, output, conf, relion, relion_project_dir, mrc_prefix): click.echo("Generating STAR file...") import partinet.process_utils.star_file - partinet.process_utils.star_file.main(labels,images,output,conf) + partinet.process_utils.star_file.main( + labels, + images, + output, + conf, + relion=relion, + relion_project_dir=relion_project_dir, + mrc_prefix=mrc_prefix, + ) @main.command() @click.option("--source", type=str, required=True, help="Path to Raw micrographs") diff --git a/partinet/process_utils/guided_denoiser.py b/partinet/process_utils/guided_denoiser.py index 3faae6a..0eaba39 100644 --- a/partinet/process_utils/guided_denoiser.py +++ b/partinet/process_utils/guided_denoiser.py @@ -167,8 +167,6 @@ def denoise(image_path: str) -> np.ndarray: # described in https://github.com/WEHI-ResearchComputing/PartiNet/issues/41 image = image.astype(np.float32) - image = image.T - image = np.rot90(image) normalized_image = standard_scaler(np.array(image)) contrast_enhanced_image = contrast_enhancement(normalized_image) weiner_filtered_image = wiener_filter(contrast_enhanced_image, kernel, K=30) diff --git a/partinet/process_utils/star_file.py b/partinet/process_utils/star_file.py index 4c7fc7a..008c58a 100644 --- a/partinet/process_utils/star_file.py +++ b/partinet/process_utils/star_file.py @@ -4,22 +4,19 @@ import csv import cv2 import argparse -from typing import List, Dict, Tuple +from typing import List, Dict, Tuple, Optional from multiprocessing import Pool, cpu_count def yolo_to_starfile(yolo_coords: Dict[str, float], image_width: int, image_height: int, diameters: List[int]) -> Tuple[int, int, int]: - x_center = math.ceil(yolo_coords['x_centre'] * image_width) - y_center = math.ceil(yolo_coords['y_centre'] * image_height) - width = yolo_coords['width'] * image_width - height = yolo_coords['height'] * image_height + x_center = math.ceil(yolo_coords["x_centre"] * image_width) + y_center = math.ceil(yolo_coords["y_centre"] * image_height) + width = yolo_coords["width"] * image_width + height = yolo_coords["height"] * image_height diameter = math.ceil(max(width, height)) diameters.append(diameter) return x_center, y_center, diameter def generate_output(labels: pd.DataFrame, filename: str, img_width: int, img_height: int) -> List[Tuple[str, int, int, int]]: - """ - Generate rows for the STAR file for a single image - """ diameters: List[int] = [] output_rows = [] for _, row in labels.iterrows(): @@ -29,46 +26,30 @@ def generate_output(labels: pd.DataFrame, filename: str, img_width: int, img_hei return output_rows def process_image(args_tuple) -> List[Tuple[str, int, int, int]]: - """ - Process a single image and return STAR rows - """ image_file, labels_path, images_path, conf_thresh = args_tuple filename = os.path.splitext(image_file)[0] label_file_path = os.path.join(labels_path, f"{filename}.txt") if not os.path.exists(label_file_path): - print(f"Warning: Label file not found for image {image_file}. Skipping.") + # skip missing labels return [] - # Read the image to get dimensions image = cv2.imread(os.path.join(images_path, image_file)) if image is None: - print(f"Warning: Could not read image {image_file}. Skipping.") return [] img_width, img_height = image.shape[1], image.shape[0] - # Read YOLO labels - custom_headers = ['class', 'x_centre', 'y_centre', 'width', 'height', 'conf'] - labels = pd.read_csv(label_file_path, header=None, names=custom_headers, sep=' ') - labels = labels[labels['conf'] > float(conf_thresh)] + custom_headers = ["class", "x_centre", "y_centre", "width", "height", "conf"] + labels = pd.read_csv(label_file_path, header=None, names=custom_headers, sep=r"\s+") + labels = labels[labels["conf"] > float(conf_thresh)] + if labels.empty: + return [] - # Generate STAR rows return generate_output(labels, filename, img_width, img_height) -def main(labels_path: str, images_path: str, star_out_path: str, conf_thresh: float) -> None: - image_files = os.listdir(images_path) - args_list = [(img_file, labels_path, images_path, conf_thresh) for img_file in image_files] - - # Use all available CPUs - with Pool(cpu_count()) as pool: - results = pool.map(process_image, args_list) - - # Flatten the results - all_rows = [row for result in results for row in result] - - # Write STAR file - with open(star_out_path, "w") as star_file: - star_writer = csv.writer(star_file, delimiter=' ') +def write_cryosparc_star(all_rows: List[Tuple[str, int, int, int]], star_out_path: str) -> None: + with open(star_out_path, "w", newline="") as star_file: + star_writer = csv.writer(star_file, delimiter=" ", lineterminator="\n") star_writer.writerow([]) star_writer.writerow(["data_"]) star_writer.writerow([]) @@ -79,14 +60,89 @@ def main(labels_path: str, images_path: str, star_out_path: str, conf_thresh: fl star_writer.writerow(["_rlnDiameter", "#4"]) star_writer.writerows(all_rows) +def write_relion_coordinate_star(path: str, coords: List[Tuple[float, float]]) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", newline="\n") as f: + f.write("data_\n\n") + f.write("loop_\n") + f.write("_rlnCoordinateX #1\n") + f.write("_rlnCoordinateY #2\n") + for x, y in coords: + f.write(f"{x:.6f} {y:.6f}\n") + +def write_relion_pick_star(rows: List[Tuple[str, str]], out_path: str) -> None: + with open(out_path, "w", newline="\n") as f: + f.write("data_coordinate_files\n\n") + f.write("loop_\n") + f.write("_rlnMicrographName #1\n") + f.write("_rlnMicrographCoordinates #2\n") + for mic, coord in rows: + f.write(f"{mic} {coord}\n") + +def relion_write(all_rows: List[Tuple[str, int, int, int]], pick_out: str, coords_dir: str, mrc_prefix: str, extension: str = ".star") -> None: + grouped: Dict[str, List[Tuple[float, float]]] = {} + for mic, x, y, d in all_rows: + grouped.setdefault(mic, []).append((float(x), float(y))) + + partinet_root = os.path.basename(os.path.dirname(pick_out)) # expects pick_out .../partinet/pick.star + mapping: List[Tuple[str, str]] = [] + for mic, coords in grouped.items(): + root = os.path.splitext(os.path.basename(mic))[0] + coord_star_name = f"{root}{extension}" + coord_star_path = os.path.join(coords_dir, coord_star_name) + write_relion_coordinate_star(coord_star_path, coords) + + mic_name = os.path.join(mrc_prefix, os.path.basename(mic)) if mrc_prefix else mic + coord_entry = os.path.join(partinet_root, "movies", coord_star_name) + mapping.append((mic_name, coord_entry)) + + write_relion_pick_star(mapping, pick_out) + +def main(labels_path: str, images_path: str, star_out_path: str, conf_thresh: float, relion: bool = False, relion_project_dir: Optional[str] = None, relion_pick: Optional[str] = None, relion_coord_dir: Optional[str] = None, mrc_prefix: str = "") -> None: + image_files = [f for f in os.listdir(images_path) if os.path.splitext(f)[1].lower() in [".mrc", ".tif", ".tiff", ".png", ".jpg", ".jpeg"]] + args_list = [(img_file, labels_path, images_path, conf_thresh) for img_file in image_files] + + with Pool(cpu_count()) as pool: + results = pool.map(process_image, args_list) + + all_rows = [row for result in results for row in result] + if not all_rows: + print("No particle rows produced.") + return + + write_cryosparc_star(all_rows, star_out_path) + print(f"Wrote cryosparc-compatible star to: {star_out_path}") + + if relion: + if relion_project_dir is None: + raise ValueError("For --relion, --relion-project-dir must be provided.") + relion_partinet = os.path.join(relion_project_dir, "partinet") + relion_pickstar = os.path.join(relion_partinet, "pick.star") + relion_coorddir = os.path.join(relion_partinet, "movies") + os.makedirs(relion_coorddir, exist_ok=True) + relion_write(all_rows, relion_pickstar, relion_coorddir, mrc_prefix) + print(f"Wrote relion pick.star: {relion_pickstar}") + print(f"Wrote relion per-micrograph stars under: {relion_coorddir}") + def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Generate STAR file from YOLO predictions") + parser = argparse.ArgumentParser(description="Generate STAR files from YOLO labels (CryoSPARC and optional RELION)") parser.add_argument("--labels", required=True, help="Path to the labels directory") parser.add_argument("--images", required=True, help="Path to the images directory") - parser.add_argument("--output", required=True, help="Path to the output STAR file") + parser.add_argument("--output", required=True, help="Path to the output STAR file (CryoSPARC style)") parser.add_argument("--conf", required=True, type=float, help="Minimum confidence threshold for predictions") + parser.add_argument("--relion", action="store_true", help="Also generate RELION pick.star + per-micrograph coordinate star files") + parser.add_argument("--relion-project-dir", default=None, help="RELION project root; outputs go to /partinet/pick.star and /partinet/movies/*.star") + parser.add_argument("--mrc-prefix", default="", help="Prefix for micrograph paths in RELION pick.star (e.g. MotionCorr/job003/movies)") return parser.parse_args() if __name__ == "__main__": args = parse_args() - main(args.labels, args.images, args.output, args.conf) \ No newline at end of file + main( + args.labels, + args.images, + args.output, + args.conf, + relion=args.relion, + relion_project_dir=args.relion_project_dir, + mrc_prefix=args.mrc_prefix, + ) \ No newline at end of file