Skip to content

Commit f84eb67

Browse files
committed
URL pattern detect
1 parent a0dbaea commit f84eb67

14 files changed

Lines changed: 150 additions & 60 deletions

File tree

ElectronJS/config.json

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1 @@
1-
{
2-
"webserver_address": "http://localhost",
3-
"webserver_port": 8074,
4-
"user_data_folder": "./user_data",
5-
"debug": true,
6-
"absolute_user_data_folder": "D:\\Document\\Projects\\EasySpider\\ElectronJS\\user_data"
7-
}
1+
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":true,"absolute_user_data_folder":"D:\\Document\\Projects\\EasySpider\\ElectronJS\\user_data"}

ElectronJS/main.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ function handleOpenBrowser(event, lang = "en", user_data_folder = "", mobile = f
457457
runBrowser(lang, user_data_folder, mobile);
458458
let size = screen.getPrimaryDisplay().workAreaSize;
459459
let width = parseInt(size.width);
460-
let height = parseInt(size.height * 0.65);
460+
let height = parseInt(size.height * 0.6);
461461
flowchart_window = new BrowserWindow({
462462
x: 0,
463463
y: 0,

ElectronJS/src/taskGrid/FlowChart_CN.html

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -513,25 +513,33 @@ <h4 class="modal-title">等价XPath</h4>
513513
<h4 class="modal-title" id="myModalLabel">保存任务</h4>
514514
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">&times;</button>
515515
</div>
516-
<div class="modal-body">
516+
<div class="modal-body" style="height:400px;overflow: auto">
517517
<input onkeydown="inputDelete(event)" id="serviceId" type="hidden" name="serviceId" value="-1"></input>
518518
<input onkeydown="inputDelete(event)" id="url" type="hidden" name="url" value="about:blank"></input>
519519
<label>任务名称:</label>
520520
<input onkeydown="inputDelete(event)" required name="serviceName" value="新web采集任务" id="serviceName" class="form-control"></input>
521521
<label>任务描述:</label>
522522
<input onkeydown="inputDelete(event)" id="serviceDescription" name="serviceDescription" class="form-control"></input>
523-
<label>每采集多少条数据保存一次(值越大采集速度越快,但如果意外退出则有数据丢失风险):</label>
524-
<input onkeydown="inputDelete(event)" type="number" value="10" id="saveThreshold" name="saveThreshold" class="form-control"></input>
525-
<label>是否为cloudflare等极端反爬网站:</label>
526-
<select id="cloudflare" name="cloudflare" class="form-control">
527-
<option value = 0></option>
528-
<option value = 1></option>
523+
<label>导出数据格式:</label>
524+
<select id="outputFormat" class="form-control">
525+
<option value = "csv">CSV</option>
526+
<option value = "xlsx">XLSX(EXCEL)</option>
527+
<option value = "mysql">MySQL</option>
529528
</select>
530529
<label>浏览器模拟类型:</label>
531530
<select id="environment" name="environment" class="form-control">
532531
<option value = 0>电脑端</option>
533532
<option value = 1>手机端(Cloudflare模式下不支持)</option>
534533
</select>
534+
<label>是否为cloudflare等极端反爬网站:</label>
535+
<select id="cloudflare" name="cloudflare" class="form-control">
536+
<option value = 0></option>
537+
<option value = 1></option>
538+
</select>
539+
<label>每采集多少条数据保存一次(值越大采集速度越快,但如果意外退出则有数据丢失风险):</label>
540+
<input onkeydown="inputDelete(event)" type="number" value="10" id="saveThreshold" name="saveThreshold" class="form-control"></input>
541+
<label>控制台预览时数据最大显示长度:</label>
542+
<input onkeydown="inputDelete(event)" type="number" value="15" id="maxViewLength" class="form-control"></input>
535543

536544
</div>
537545
<div class="modal-footer">

ElectronJS/src/taskGrid/logic_CN.js

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,8 @@ function saveService(type) {
427427
"saveThreshold": saveThreshold,
428428
"cloudflare": cloudflare,
429429
"environment": environment,
430+
"maxViewLength": parseInt($("#maxViewLength").val()),
431+
"outputFormat": $("#outputFormat").val(),
430432
"containJudge": containJudge,
431433
"desc": serviceDescription,
432434
"inputParameters": inputParameters,
@@ -460,10 +462,24 @@ if (sId != null && sId != -1) //加载任务
460462
$.get(backEndAddressServiceWrapper + "/queryTask?id=" + sId, function(result) {
461463
nodeList = result["graph"];
462464
app.$data.list.nl = nodeList;
465+
for(let node of nodeList){ //兼容旧版本
466+
if(node["option"] == 1){
467+
if(!("cookies" in node["parameters"])) {
468+
node["parameters"]["cookies"] = "";
469+
}
470+
}
471+
}
463472
$("#serviceName").val(result["name"]);
464473
$("#serviceId").val(result["id"]);
465474
$("#url").val(result["url"]);
466475
$("#serviceDescription").val(result["desc"]);
476+
for(let key of Object.keys(result)){
477+
try{
478+
$("#"+key).val(result[key]);
479+
} catch(e){
480+
console.log(e);
481+
}
482+
}
467483
refresh();
468484
});
469485
} else {

ElectronJS/tasks/1.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

ElectronJS/tasks/112.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

ElectronJS/tasks/142.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"id":142,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/6/2023, 3:38:35 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":"test=123\nipLoc-djd=53283-53456-0-0\nareaId=53283\nmba_sid=16885856346417163685425076773.0\n__jdc=122270672\n__jdb=122270672.1.16885856346381587112207|1.1688585634\nmba_muid=16885856346381587112207\n__jdv=122270672%7Clocalhost%3A8074%7C-%7Creferral%7C-%7C1688585634639\n__jda=122270672.16885856346381587112207.1688585634.1688585634.1688585634.1"}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"p2h2i1dva8ljq4aje2","iframe":false,"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
1+
{"id":142,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/6/2023, 4:08:31 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":"test=123\nipLoc-djd=53283-53456-0-0\nareaId=53283\nmba_sid=16885856346417163685425076773.0\n__jdc=122270672\n__jdb=122270672.1.16885856346381587112207|1.1688585634\nmba_muid=16885856346381587112207\n__jdv=122270672%7Clocalhost%3A8074%7C-%7Creferral%7C-%7C1688585634639\n__jda=122270672.16885856346381587112207.1688585634.1688585634.1688585634.1"}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"p2h2i1dva8ljq4aje2","iframe":false,"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}

ElectronJS/tasks/143.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"id":143,"name":"中国知网","url":"https://chn.oversea.cnki.net/index/","links":"https://chn.oversea.cnki.net/index/","create_time":"7/6/2023, 4:50:52 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"csv","containJudge":false,"desc":"https://chn.oversea.cnki.net/index/","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://chn.oversea.cnki.net/index/","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://chn.oversea.cnki.net/index/"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://chn.oversea.cnki.net/index/","links":"https://chn.oversea.cnki.net/index/","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}}]}

0 commit comments

Comments
 (0)