-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinput.txt.example
More file actions
136 lines (126 loc) · 3.81 KB
/
Copy pathinput.txt.example
File metadata and controls
136 lines (126 loc) · 3.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Reference only — not loaded automatically.
# Configure via web UI (Run audit → Settings) or: python -m src --config input.txt.example
# Requires DATABASE_URL (PostgreSQL). UI writes shadow file to DATA_DIR/pipeline-config.txt.
# Keys match web/src/lib/pipelineConfigSchema.ts (ALL_SCHEMA_KEYS).
# --- Crawl ---
start_url =
max_pages = 20
concurrency = 8
timeout = 12
max_depth = 6
polite_delay = 0.2
ignore_robots = false
allow_external = false
store_outlinks = true
store_content_excerpt = true
content_excerpt_max_chars = 4096
store_page_html = false
max_stored_html_bytes = 2097152
run_content_analysis = false
content_analysis_strategy = main_only
content_analysis_workers = 4
preserve_crawl_history = true
crawl_stream_to_db = false
crawl_exclude_urls =
crawl_discovery_mode = spider
crawl_url_list =
crawl_user_agent_preset = default
crawl_user_agent_custom =
compare_mobile_desktop = false
crawl_auth_username =
crawl_auth_password =
crawl_extra_headers =
crawl_cookies =
crawl_robots_txt_override =
custom_extractors =
# crawl_render_mode: static | javascript | auto (auto = static first, browser when SPA heuristics match)
crawl_render_mode = static
crawl_js_concurrency = 3
crawl_js_timeout = 30
crawl_js_wait_until = domcontentloaded
crawl_js_extra_wait_ms = 1500
crawl_js_block_resources = true
crawl_js_capture_console = true
crawl_js_console_levels = error,warning
crawl_js_capture_failed_requests = false
crawl_js_console_max_per_page = 20
# --- Report ---
outbound_domain_max_rows = 200
include_keyword_opportunities = true
site_name = Site
report_title = SEO report
max_fetch_for_edges = 300
same_domain_only = true
max_nodes_plot = 400
run_security_scan = true
security_scan_active = false
security_max_urls_probe = 20
probe_image_inventory = false
max_image_probe_urls = 500
image_probe_concurrency = 6
image_probe_timeout = 8
image_unoptimized_min_kb = 200
enable_subdomain_discovery = true
subdomain_ct_lookup = true
enable_rdap_org_lookup = true
# --- Lighthouse ---
lighthouse_url =
lighthouse_mode = navigation
lighthouse_strategy = desktop
lighthouse_categories = performance,accessibility,best-practices,seo
lighthouse_iterations = 1
run_lighthouse = true
run_lighthouse_on_pages = true
enable_crux = false
enable_rich_results_validation = false
google_rich_results_api_key =
enable_axe = false
enable_spell_check = false
enable_html_validation = false
enable_amp_audit = false
enable_wayback_lookup = false
competitor_domains =
bing_webmaster_api_key =
serp_api_key =
export_logo_url =
custom_extraction_regex =
crawl_path_segments =
crawl_ignore_params =
lighthouse_max_pages = 2
lighthouse_concurrency = 2
# --- Content analysis ---
enable_duplicate_detection = true
enable_language_detection = true
analysis_fuzzy_threshold = 92
analysis_simhash_hamming = 0
analysis_simhash_max_urls = 800
analysis_fuzzy_max_urls = 600
analysis_dup_max_pages = 2000
# --- Audit steps ---
run_crawl = true
run_report = true
run_plot = true
# --- Google (GSC & GA4) ---
# Google OAuth: Integrations panel (gear icon) → saved in PostgreSQL (google_app_settings + properties)
enable_google_search_console = false
enable_google_analytics = false
google_date_range_days = 28
google_url_gap_list_limit = 200
# enrich_keywords_after_report: omit for auto, or set true/false to override Search Console toggle
# google_credentials_path: removed — use Integrations UI and PostgreSQL only
# --- Basics ---
keyword_max_pages = 200
keyword_gsc_max_rows = 25000
brand_name =
keyword_seeds =
# --- Expansion ---
enable_google_suggest = false
enable_google_trends = false
enable_wikipedia_topic = false
enable_datamuse = false
keyword_suggest_top_n = 20
keyword_max_suggest_results = 8
# --- Advanced ---
warning_mapper_input =
warning_mapper_input_type = lighthouse
# AI insights (OpenAI, Gemini, Claude, Ollama): web UI Run audit → AI tab only (llm_config table).