Skip to content

Commit 5aeb7d6

Browse files
committed
refine list core job
1 parent eece0b3 commit 5aeb7d6

5 files changed

Lines changed: 494 additions & 518 deletions

File tree

lib/jobs/list_core.js

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
'use strict';
2+
3+
const os = require('os');
4+
const fs = require('fs');
5+
const fsx = require('fs').promises;
6+
const path = require('path');
7+
const utils = require('../utils');
8+
9+
const REPORT_INTERVAL = 60 * 1000;
10+
11+
// if user renamed core file, then only check those with the
12+
// prefix
13+
function check(prefixList, item) {
14+
return prefixList.some((prefix) => {
15+
return item.startsWith(prefix) && !item.endsWith('.gz');
16+
});
17+
}
18+
19+
class ListCoreJob {
20+
/**
21+
* 在以下3种路径下查找core文件
22+
* 1. 用户指定路径 config.json里面 coredir:[dir1, dir2, ...]
23+
* 2. 用户在 /proc/sys/kernel/core_pattern 指定core生成格式
24+
* /proc/sys/kernel/core_pattern 可以设置格式化的 core 文件保存位置或文件名
25+
* echo “/opt/corefile/core-%e-%p-%t” > /proc/sys/kernel/core_pattern
26+
* 将会控制所产生的 core 文件会存放到 /corefile
27+
* 以下是参数列表 :
28+
* %p - insert pid into filename
29+
* %u - insert current uid into filename
30+
* %g - insert current gid into filename
31+
* %s - insert signal that caused the coredump into the filename
32+
* %t - insert UNIX time that the coredump occurred into filename
33+
* %h - insert hostname where the coredump happened into filename
34+
* %e - insert coredumping executable name into filename
35+
* 注: 只处理上面这种形式,对于第三方处理的情况不考虑
36+
* 3. 那么对于linux系统,在Node进程的pwd目录,对于Mac在/cores下查找
37+
*/
38+
constructor(config) {
39+
this.coreFileNamePrefix = ['core'];
40+
41+
if (config && config.coredir) {
42+
if (Array.isArray(config.coredir)) {
43+
this.coredir = config.coredir;
44+
} else if (typeof config.coredir === 'string') {
45+
this.coredir = [config.coredir];
46+
} else {
47+
// ignore other types
48+
this.coredir = [];
49+
}
50+
} else {
51+
this.coredir = [];
52+
}
53+
54+
// for linux, check
55+
// core dir specified by /proc/sys/kernal/core_pattern e.g. '/tmp/core_%e.%p'
56+
if (os.platform() === 'linux') {
57+
if (!fs.existsSync('/proc/sys/kernel/core_pattern')) {
58+
return;
59+
}
60+
61+
const patt = fs.readFileSync('/proc/sys/kernel/core_pattern', 'utf8').trim().split(' ')[0];
62+
if (patt.indexOf('%') > 0) {
63+
// /tmp/core_%e.%p
64+
const coredir_ = path.parse(patt).dir;
65+
if (fs.existsSync(coredir_)) {
66+
try {
67+
fs.accessSync(coredir_, fs.R_OK);
68+
this.coredir.push(coredir_);
69+
const prefix = path.parse(patt).name.split('%')[0];
70+
if (prefix !== this.coreFileNamePrefix[0]) {
71+
this.coreFileNamePrefix.push(prefix);
72+
}
73+
} catch (e) {
74+
console.log(coredir_ + ' is unaccessible: ' + e.message);
75+
}
76+
}
77+
}
78+
}
79+
}
80+
81+
async getNodePids() {
82+
const { stdout } = await utils.execAsync('ps -e -o pid,args | grep -E "node " | grep -v grep');
83+
const pids = [];
84+
const processes = stdout.toString().trim().split('\n');
85+
for (let i = 0; i < processes.length; i++) {
86+
if (processes[i] && processes[i].split(' ')[0]) {
87+
pids.push(processes[i].split(' ')[0]);
88+
}
89+
}
90+
return pids;
91+
}
92+
93+
async getNodePWD(pid) {
94+
const path = `/proc/${pid}/environ`;
95+
try {
96+
await fsx.access(path, fs.constants.R_OK);
97+
} catch (err) {
98+
// 忽略该进程
99+
return null;
100+
}
101+
102+
const env = await fsx.readFile(path, 'utf8');
103+
const envs = env.toString().trim().split('\u0000');
104+
for (let i = 0; i < envs.length; i++) {
105+
if (envs[i].indexOf('PWD') === 0) {
106+
return envs[i].split('=')[1];
107+
}
108+
}
109+
110+
return null;
111+
}
112+
113+
async findCores(dir) {
114+
const results = [];
115+
try {
116+
await fsx.access(dir);
117+
} catch (ex) {
118+
return results;
119+
}
120+
121+
const files = await fsx.readdir(dir);
122+
for (let i = 0; i < files.length; i++) {
123+
if (!check(this.coreFileNamePrefix, files[i])) {
124+
continue;
125+
}
126+
const file = path.join(dir, files[i]);
127+
const stat = await fsx.stat(file);
128+
// bypass directory
129+
if (!stat.isFile()) {
130+
continue;
131+
}
132+
133+
// bypass core created before agentx startup
134+
if (stat.ctimeMs < Date.now() - REPORT_INTERVAL) {
135+
continue;
136+
}
137+
138+
results.push({
139+
path: file,
140+
size: stat.size,
141+
ctime: stat.ctime
142+
});
143+
}
144+
145+
return results;
146+
}
147+
148+
async run() {
149+
// 非 linux,不处理,不上报
150+
if (os.platform() !== 'linux') {
151+
return null;
152+
}
153+
154+
// 查找当前运行中的 Node 进程 pid 列表
155+
const pids = await this.getNodePids();
156+
const pwds = [];
157+
for (let i = 0; i < pids.length; i++) {
158+
// 根据进程 ID,获取进程的 PWD 目录
159+
const pwd = await this.getNodePWD(pids[i]);
160+
if (pwd) {
161+
pwds.push(pwd);
162+
}
163+
}
164+
// 合并目录并去重
165+
const dirs = Array.from(new Set([...this.coredir, ...pwds]));
166+
if (dirs.length === 0) {
167+
return null;
168+
}
169+
170+
// 从目录中查找符合条件的 core 文件列表
171+
let cores = [];
172+
const count = dirs.length;
173+
for (let i = 0; i < count; i++) {
174+
const dir = dirs[i];
175+
const list = await this.findCores(dir);
176+
cores = [...cores, ...list];
177+
}
178+
179+
return {
180+
type: 'coredump',
181+
metrics: cores
182+
};
183+
}
184+
185+
static reportInterval = REPORT_INTERVAL; // 1 min
186+
}
187+
188+
module.exports = ListCoreJob;

0 commit comments

Comments
 (0)