diffscope/src/commands/doctor/command/display/inference.rs at a412f14dfed423dd039d056638e2102ba558d64c · evalops/diffscope · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
use std::time::Duration;

use crate::core::offline::{LocalModel, ReadinessCheck};

pub(in super::super) fn print_recommended_model_summary(
    recommended: &LocalModel,
    estimated_ram_mb: usize,
    detected_context_window: Option<usize>,
    readiness: &ReadinessCheck,
) {
    println!("\nRecommended for code review: {}", recommended.name);
    println!("  Estimated RAM: ~{estimated_ram_mb}MB");

    if let Some(ctx_size) = detected_context_window {
        println!("  Context window: {ctx_size} tokens (detected from model)");
    }

    if readiness.ready {
        println!("\nStatus: READY");
    } else {
        println!("\nStatus: NOT READY");
        for warning in &readiness.warnings {
            println!("  Warning: {warning}");
        }
    }
}

pub(in super::super) fn print_inference_success(elapsed: Duration, tokens_per_sec: f64) {
    println!(
        "OK ({:.1}s, ~{:.0} tok/s)",
        elapsed.as_secs_f64(),
        tokens_per_sec
    );
    if tokens_per_sec < 2.0 {
        println!("  Warning: Very slow inference. Consider a smaller/quantized model.");
    }
}

pub(in super::super) fn print_inference_failure(error: &impl std::fmt::Display) {
    println!("FAILED");
    println!("  Error: {error}");
    println!("  The model may still be loading. Try again in a moment.");
}

pub(in super::super) fn print_usage(base_url: &str, model_flag: &str) {
    println!("\nUsage:");
    println!("  git diff | diffscope review --base-url {base_url} --model {model_flag}");
}