-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.yaml
More file actions
106 lines (93 loc) · 3.49 KB
/
models.yaml
File metadata and controls
106 lines (93 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
---
# Machine-readable index of local models (generated)
# fields: name, file, role, format, vram_estimate
models:
- name: qwen2.5-7b-instruct
file: qwen2.5-7b-instruct-q4_k_m.gguf
role: small-instruct (schema extraction / validation)
format: gguf Q4_K_M
vram_estimate: 6-8GB
- name: qwen3-7b-instruct
file: qwen3-7b-instruct-q4_k_m.gguf
role: small-instruct (alternative)
format: gguf Q4_K_M
vram_estimate: 6-8GB
- name: dorna-llama3-8b-instruct
file: dorna-llama3-8b-instruct.Q4_K_M.gguf
role: instruct (heavy / on-demand)
format: gguf Q4_K_M
vram_estimate: 7-9GB (test on device)
- name: Dorna2-Llama3.1-8B-Instruct
file: |
Dorna2-Llama3.1-8B-Instruct-model-00001-of-00005.safetensors
Dorna2-Llama3.1-8B-Instruct-model-00002-of-00005.safetensors
Dorna2-Llama3.1-8B-Instruct-model-00003-of-00005.safetensors
Dorna2-Llama3.1-8B-Instruct-model-00004-of-00005.safetensors
Dorna2-Llama3.1-8B-Instruct-model-00005-of-00005.safetensors
role: heavy-sharded (offline / finetune)
format: safetensors (multi-shard)
vram_estimate: >16GB (not suitable for RTX2060S)
- name: aya-expanse-8b
file: |
aya-expanse-8b-model-00001-of-00004.safetensors
aya-expanse-8b-model-00002-of-00004.safetensors
aya-expanse-8b-model-00003-of-00004.safetensors
aya-expanse-8b-model-00004-of-00004.safetensors
role: heavy-sharded (research / batch)
format: safetensors (multi-shard)
vram_estimate: >16GB
- name: gemma-3-4b-persian
file: |
gemma-3-4b-persian-v0-model-00001-of-00004.safetensors
gemma-3-4b-persian-v0-model-00002-of-00004.safetensors
gemma-3-4b-persian-v0-model-00003-of-00004_2.safetensors
gemma-3-4b-persian-v0-model-00004-of-00004.safetensors
gemma-3-4b-persian-v0-abliterated-q8_0.gguf
role: persian-specialized (post-process, generation)
format: safetensors / gguf
vram_estimate: 4-6GB (quant recommended)
- name: PersiandMind-PersianMind-v1.0
file: PersianMind-v1.0.Q5_K_M.gguf
role: persian-specialized (normalization / generation)
format: gguf Q5
vram_estimate: 3-6GB
- name: Tooka-SBERT
file: |
Tooka-SBERT.safetensors
Tooka-SBERT-V2-Large.safetensors
role: embeddings
format: safetensors
vram_estimate: 0.5-2GB (CPU friendly)
- name: whisper-persian-v4
file: |
"whisper-persian-v4 -model-00001-of-00002.safetensors"
"whisper-persian-v4 -model-00002-of-00002.safetensors"
whisper-persian-v4 -model.safetensors.index.json
role: asr (Persian)
format: safetensors
vram_estimate: 2-6GB (depending on model size)
- name: deepseek-ocr-persian
file: |
DeepSeek-OCR-2 .safetensors
deepseek-ocr-persian.safetensors.index.json
deepseek-ocr-persian.safetensors
role: OCR (printed Persian)
format: safetensors
vram_estimate: 2-4GB
- name: qwen3-vl-4b
file: |
Qwen3-VL-4B-Thinking-mmproj-Qwen3VL-4B-Thinking-F16.gguf
Qwen3-VL-4B-Thinking-Qwen3VL-4B-Thinking-Q4_K_M.gguf
role: vision-LM (experimental OCR-free extraction)
format: gguf Q4 / F16
vram_estimate: 6-8GB
- name: qwen3-coder-next-awq-shards
file: Qwen3-Coder-Next-AWQ-4bit-model-00001-of-00010.safetensors
role: coder (code/sql generation)
format: AWQ 4-bit shards
vram_estimate: 4-8GB (depends on loader)
- name: hemlock-coder-7b
file: Hemlock-Coder-7B.i1-Q4_K_M.gguf
role: coder (code/sql generation)
format: gguf Q4
vram_estimate: 4-6GB