|
| 1 | +from pathlib import Path |
| 2 | +from bs4 import BeautifulSoup |
| 3 | +from rich.table import Table |
| 4 | +from rich.tree import Tree |
| 5 | +from rich.panel import Panel |
| 6 | +from collections import defaultdict |
| 7 | +import re |
| 8 | + |
| 9 | +def inspect_workflow(workflow_file, source_dir, output_json, console): |
| 10 | + workflow_path = Path(workflow_file) |
| 11 | + |
| 12 | + if output_json: |
| 13 | + return _inspect_json(workflow_path, source_dir) |
| 14 | + |
| 15 | + _inspect_rich(workflow_path, source_dir, console) |
| 16 | + |
| 17 | +def _inspect_rich(workflow_path, source_dir, console): |
| 18 | + console.print() |
| 19 | + console.print(f"[bold cyan]Workflow:[/bold cyan] {workflow_path.name}") |
| 20 | + console.print() |
| 21 | + |
| 22 | + try: |
| 23 | + with open(workflow_path, 'r') as f: |
| 24 | + content = f.read() |
| 25 | + |
| 26 | + soup = BeautifulSoup(content, 'xml') |
| 27 | + |
| 28 | + if not soup.find('graphml'): |
| 29 | + console.print("[red]Not a valid GraphML file[/red]") |
| 30 | + return |
| 31 | + |
| 32 | + nodes = soup.find_all('node') |
| 33 | + edges = soup.find_all('edge') |
| 34 | + |
| 35 | + tree = Tree("📊 [bold]Workflow Overview[/bold]") |
| 36 | + |
| 37 | + lang_counts = defaultdict(int) |
| 38 | + node_files = [] |
| 39 | + missing_files = [] |
| 40 | + |
| 41 | + for node in nodes: |
| 42 | + label_tag = node.find('y:NodeLabel') |
| 43 | + if label_tag and label_tag.text: |
| 44 | + label = label_tag.text.strip() |
| 45 | + if ':' in label: |
| 46 | + _, filename = label.split(':', 1) |
| 47 | + node_files.append(filename) |
| 48 | + |
| 49 | + ext = Path(filename).suffix |
| 50 | + if ext == '.py': |
| 51 | + lang_counts['Python'] += 1 |
| 52 | + elif ext == '.m': |
| 53 | + lang_counts['MATLAB'] += 1 |
| 54 | + elif ext == '.java': |
| 55 | + lang_counts['Java'] += 1 |
| 56 | + elif ext == '.cpp' or ext == '.hpp': |
| 57 | + lang_counts['C++'] += 1 |
| 58 | + elif ext == '.v': |
| 59 | + lang_counts['Verilog'] += 1 |
| 60 | + else: |
| 61 | + lang_counts['Other'] += 1 |
| 62 | + |
| 63 | + src_dir = workflow_path.parent / source_dir |
| 64 | + if not (src_dir / filename).exists(): |
| 65 | + missing_files.append(filename) |
| 66 | + |
| 67 | + nodes_branch = tree.add(f"Nodes: [bold]{len(nodes)}[/bold]") |
| 68 | + if lang_counts: |
| 69 | + for lang, count in sorted(lang_counts.items(), key=lambda x: -x[1]): |
| 70 | + nodes_branch.add(f"{lang}: {count}") |
| 71 | + |
| 72 | + edges_branch = tree.add(f"Edges: [bold]{len(edges)}[/bold]") |
| 73 | + |
| 74 | + edge_label_regex = re.compile(r"0x([a-fA-F0-9]+)_(\S+)") |
| 75 | + zmq_count = 0 |
| 76 | + file_count = 0 |
| 77 | + |
| 78 | + for edge in edges: |
| 79 | + label_tag = edge.find('y:EdgeLabel') |
| 80 | + label_text = label_tag.text.strip() if label_tag and label_tag.text else "" |
| 81 | + if label_text and edge_label_regex.match(label_text): |
| 82 | + zmq_count += 1 |
| 83 | + else: |
| 84 | + file_count += 1 |
| 85 | + |
| 86 | + if zmq_count > 0: |
| 87 | + edges_branch.add(f"ZMQ: {zmq_count}") |
| 88 | + if file_count > 0: |
| 89 | + edges_branch.add(f"File-based: {file_count}") |
| 90 | + |
| 91 | + comm_type = "ZMQ (0mq)" if zmq_count > 0 else "File-based" if file_count > 0 else "None" |
| 92 | + tree.add(f"Communication: [bold]{comm_type}[/bold]") |
| 93 | + |
| 94 | + if missing_files: |
| 95 | + missing_branch = tree.add(f"[yellow]Missing files: {len(missing_files)}[/yellow]") |
| 96 | + for f in missing_files[:5]: |
| 97 | + missing_branch.add(f"[yellow]{f}[/yellow]") |
| 98 | + if len(missing_files) > 5: |
| 99 | + missing_branch.add(f"[dim]...and {len(missing_files) - 5} more[/dim]") |
| 100 | + |
| 101 | + console.print(tree) |
| 102 | + console.print() |
| 103 | + |
| 104 | + if nodes: |
| 105 | + table = Table(title="Node Details", show_header=True, header_style="bold magenta") |
| 106 | + table.add_column("ID", style="cyan", width=12) |
| 107 | + table.add_column("File", style="white") |
| 108 | + table.add_column("Language", style="green") |
| 109 | + table.add_column("Status", style="yellow") |
| 110 | + |
| 111 | + for node in nodes[:10]: |
| 112 | + label_tag = node.find('y:NodeLabel') |
| 113 | + if label_tag and label_tag.text: |
| 114 | + label = label_tag.text.strip() |
| 115 | + if ':' in label: |
| 116 | + node_id, filename = label.split(':', 1) |
| 117 | + |
| 118 | + ext = Path(filename).suffix |
| 119 | + lang_map = { |
| 120 | + '.py': 'Python', |
| 121 | + '.m': 'MATLAB', |
| 122 | + '.java': 'Java', |
| 123 | + '.cpp': 'C++', |
| 124 | + '.hpp': 'C++', |
| 125 | + '.v': 'Verilog' |
| 126 | + } |
| 127 | + lang = lang_map.get(ext, 'Other') |
| 128 | + |
| 129 | + src_dir = workflow_path.parent / source_dir |
| 130 | + status = "✓" if (src_dir / filename).exists() else "✗" |
| 131 | + |
| 132 | + table.add_row(node_id, filename, lang, status) |
| 133 | + |
| 134 | + if len(nodes) > 10: |
| 135 | + table.caption = f"Showing 10 of {len(nodes)} nodes" |
| 136 | + |
| 137 | + console.print(table) |
| 138 | + console.print() |
| 139 | + |
| 140 | + if edges: |
| 141 | + edge_table = Table(title="Edge Connections", show_header=True, header_style="bold magenta") |
| 142 | + edge_table.add_column("From", style="cyan", width=12) |
| 143 | + edge_table.add_column("To", style="cyan", width=12) |
| 144 | + edge_table.add_column("Type", style="green") |
| 145 | + |
| 146 | + for edge in edges[:10]: |
| 147 | + source = edge.get('source', 'unknown') |
| 148 | + target = edge.get('target', 'unknown') |
| 149 | + |
| 150 | + label_tag = edge.find('y:EdgeLabel') |
| 151 | + edge_type = "File" |
| 152 | + if label_tag and label_tag.text: |
| 153 | + if edge_label_regex.match(label_tag.text.strip()): |
| 154 | + edge_type = "ZMQ" |
| 155 | + |
| 156 | + edge_table.add_row(source, target, edge_type) |
| 157 | + |
| 158 | + if len(edges) > 10: |
| 159 | + edge_table.caption = f"Showing 10 of {len(edges)} edges" |
| 160 | + |
| 161 | + console.print(edge_table) |
| 162 | + console.print() |
| 163 | + |
| 164 | + except FileNotFoundError: |
| 165 | + console.print(f"[red]File not found:[/red] {workflow_path}") |
| 166 | + except Exception as e: |
| 167 | + console.print(f"[red]Inspection failed:[/red] {str(e)}") |
| 168 | + |
| 169 | +def _inspect_json(workflow_path, source_dir): |
| 170 | + import json |
| 171 | + |
| 172 | + try: |
| 173 | + with open(workflow_path, 'r') as f: |
| 174 | + content = f.read() |
| 175 | + |
| 176 | + soup = BeautifulSoup(content, 'xml') |
| 177 | + |
| 178 | + if not soup.find('graphml'): |
| 179 | + print(json.dumps({'error': 'Not a valid GraphML file'}, indent=2)) |
| 180 | + return |
| 181 | + |
| 182 | + nodes = soup.find_all('node') |
| 183 | + edges = soup.find_all('edge') |
| 184 | + |
| 185 | + lang_counts = defaultdict(int) |
| 186 | + node_list = [] |
| 187 | + edge_list = [] |
| 188 | + missing_files = [] |
| 189 | + |
| 190 | + for node in nodes: |
| 191 | + label_tag = node.find('y:NodeLabel') |
| 192 | + if label_tag and label_tag.text: |
| 193 | + label = label_tag.text.strip() |
| 194 | + if ':' in label: |
| 195 | + node_id, filename = label.split(':', 1) |
| 196 | + |
| 197 | + ext = Path(filename).suffix |
| 198 | + lang_map = { |
| 199 | + '.py': 'python', |
| 200 | + '.m': 'matlab', |
| 201 | + '.java': 'java', |
| 202 | + '.cpp': 'cpp', |
| 203 | + '.hpp': 'cpp', |
| 204 | + '.v': 'verilog' |
| 205 | + } |
| 206 | + lang = lang_map.get(ext, 'other') |
| 207 | + lang_counts[lang] += 1 |
| 208 | + |
| 209 | + src_dir = workflow_path.parent / source_dir |
| 210 | + exists = (src_dir / filename).exists() |
| 211 | + if not exists: |
| 212 | + missing_files.append(filename) |
| 213 | + |
| 214 | + node_list.append({ |
| 215 | + 'id': node_id, |
| 216 | + 'file': filename, |
| 217 | + 'language': lang, |
| 218 | + 'exists': exists |
| 219 | + }) |
| 220 | + |
| 221 | + edge_label_regex = re.compile(r"0x([a-fA-F0-9]+)_(\S+)") |
| 222 | + zmq_count = 0 |
| 223 | + file_count = 0 |
| 224 | + |
| 225 | + for edge in edges: |
| 226 | + source = edge.get('source') |
| 227 | + target = edge.get('target') |
| 228 | + |
| 229 | + label_tag = edge.find('y:EdgeLabel') |
| 230 | + label_text = label_tag.text.strip() if label_tag and label_tag.text else "" |
| 231 | + edge_type = 'file' |
| 232 | + if label_text and edge_label_regex.match(label_text): |
| 233 | + edge_type = 'zmq' |
| 234 | + zmq_count += 1 |
| 235 | + else: |
| 236 | + file_count += 1 |
| 237 | + |
| 238 | + edge_list.append({ |
| 239 | + 'source': source, |
| 240 | + 'target': target, |
| 241 | + 'type': edge_type |
| 242 | + }) |
| 243 | + |
| 244 | + result = { |
| 245 | + 'workflow': str(workflow_path.name), |
| 246 | + 'nodes': { |
| 247 | + 'total': len(nodes), |
| 248 | + 'by_language': dict(lang_counts), |
| 249 | + 'list': node_list |
| 250 | + }, |
| 251 | + 'edges': { |
| 252 | + 'total': len(edges), |
| 253 | + 'zmq': zmq_count, |
| 254 | + 'file': file_count, |
| 255 | + 'list': edge_list |
| 256 | + }, |
| 257 | + 'missing_files': missing_files |
| 258 | + } |
| 259 | + |
| 260 | + print(json.dumps(result, indent=2)) |
| 261 | + |
| 262 | + except Exception as e: |
| 263 | + print(json.dumps({'error': str(e)}, indent=2)) |
0 commit comments