@@ -37,6 +37,14 @@ static class PdfBuilder
3737{
3838 private static readonly SearchValues < char > InvalidPathChars = SearchValues . Create ( Path . GetInvalidPathChars ( ) ) ;
3939
40+ class HeadingInfo
41+ {
42+ public string Text { get ; init ; } = "" ;
43+ public string Id { get ; init ; } = "" ;
44+ public int Level { get ; init ; }
45+ public Uri PageUrl { get ; init ; } = null ! ;
46+ }
47+
4048 class Outline
4149 {
4250 public string name { get ; init ; } = "" ;
@@ -51,6 +59,9 @@ class Outline
5159
5260 public string ? pdfHeaderTemplate { get ; init ; }
5361 public string ? pdfFooterTemplate { get ; init ; }
62+
63+ public string ? pdfTocSource { get ; init ; }
64+ public int pdfTocHeadingDepth { get ; init ; } = 3 ;
5465 }
5566
5667 public static Task Run ( BuildJsonConfig config , string configDirectory , string ? outputDirectory = null , CancellationToken cancellationToken = default )
@@ -93,6 +104,8 @@ void onSignal(PosixSignalContext context)
93104
94105 Uri ? baseUrl = null ;
95106 var pdfPageNumbers = new ConcurrentDictionary < string , Dictionary < Outline , int > > ( ) ;
107+ var pdfUrlPageNumbers = new ConcurrentDictionary < string , Dictionary < Uri , int > > ( ) ;
108+ var pdfHeadings = new ConcurrentDictionary < string , List < HeadingInfo > > ( ) ;
96109
97110 using var app = builder . Build ( ) ;
98111 app . UseServe ( outputFolder ) ;
@@ -127,6 +140,8 @@ void onSignal(PosixSignalContext context)
127140 await CreatePdf (
128141 PrintPdf , PrintHeaderFooter , task , new ( baseUrl , url ) , toc , outputFolder , pdfOutputPath ,
129142 pageNumbers => pdfPageNumbers [ url ] = pageNumbers ,
143+ urlPageNumbers => pdfUrlPageNumbers [ url ] = urlPageNumbers ,
144+ headings => pdfHeadings [ url ] = headings ,
130145 cancellationToken ) ;
131146
132147 task . Value = task . MaxValue ;
@@ -186,20 +201,23 @@ await CreatePdf(
186201 IResult TocPage ( string url )
187202 {
188203 var pageNumbers = pdfPageNumbers . GetValueOrDefault ( url ) ;
189- return Results . Content ( TocHtmlTemplate ( new Uri ( baseUrl ! , url ) , pdfTocs [ url ] , pageNumbers ) . ToString ( ) , "text/html" , Encoding . UTF8 ) ;
204+ var urlPageNumbers = pdfUrlPageNumbers . GetValueOrDefault ( url ) ;
205+ var headings = pdfHeadings . GetValueOrDefault ( url ) ;
206+ return Results . Content ( TocHtmlTemplate ( new Uri ( baseUrl ! , url ) , pdfTocs [ url ] , pageNumbers , urlPageNumbers , headings ) . ToString ( ) , "text/html" , Encoding . UTF8 ) ;
190207 }
191208
192- async Task < byte [ ] ? > PrintPdf ( Outline outline , Uri url )
209+ async Task < ( byte [ ] ? bytes , List < HeadingInfo > headings ) > PrintPdf ( Outline outline , Uri url , int headingDepth )
193210 {
194211 await pageLimiter . WaitAsync ( cancellationToken ) ;
195212 var page = pagePool . TryTake ( out var pooled ) ? pooled : await context . NewPageAsync ( ) ;
213+ var headings = new List < HeadingInfo > ( ) ;
196214
197215 try
198216 {
199217 Uri beforeUri = new ( page . Url ) ;
200218 var response = await page . GotoAsync ( url . ToString ( ) , new ( ) { WaitUntil = WaitUntilState . DOMContentLoaded } ) ;
201219 if ( response ? . Status is 404 )
202- return null ;
220+ return ( null , headings ) ;
203221
204222 bool isSameUrlNavigation = response == null && beforeUri == url ;
205223 bool isHashFragmentNavigation = response == null
@@ -234,11 +252,19 @@ IResult TocPage(string url)
234252 }
235253 }
236254
237- return await page . PdfAsync ( new PagePdfOptions
255+ // Extract headings from the page if needed
256+ if ( outline . pdfTocSource == "headings" && headingDepth > 0 && ! IsTocPage ( url ) && ! IsCoverPage ( url , outputFolder , outline . pdfCoverPage ) )
257+ {
258+ headings = await ExtractHeadingsFromPage ( page , url , headingDepth ) ;
259+ }
260+
261+ var bytes = await page . PdfAsync ( new PagePdfOptions
238262 {
239263 PreferCSSPageSize = true ,
240264 PrintBackground = outline . pdfPrintBackground ,
241265 } ) ;
266+
267+ return ( bytes , headings ) ;
242268 }
243269 finally
244270 {
@@ -247,6 +273,45 @@ IResult TocPage(string url)
247273 }
248274 }
249275
276+ async Task < List < HeadingInfo > > ExtractHeadingsFromPage ( IPage page , Uri pageUrl , int maxDepth )
277+ {
278+ var headings = new List < HeadingInfo > ( ) ;
279+ var selector = string . Join ( "," , Enumerable . Range ( 1 , maxDepth ) . Select ( i => $ "article h{ i } , .content h{ i } ") ) ;
280+
281+ try
282+ {
283+ var elements = await page . QuerySelectorAllAsync ( selector ) ;
284+ foreach ( var element in elements )
285+ {
286+ var tagName = await element . EvaluateAsync < string > ( "e => e.tagName" ) ;
287+ var level = int . Parse ( tagName [ 1 ] . ToString ( ) ) ;
288+ var id = await element . GetAttributeAsync ( "id" ) ?? "" ;
289+ var text = ( await element . InnerTextAsync ( ) ) . Trim ( ) ;
290+
291+ // Skip headings without id or text
292+ if ( string . IsNullOrEmpty ( id ) || string . IsNullOrEmpty ( text ) )
293+ continue ;
294+
295+ // Clean up text (remove source link icons, etc.)
296+ var cleanText = text . Split ( '\n ' ) [ 0 ] . Trim ( ) ;
297+
298+ headings . Add ( new HeadingInfo
299+ {
300+ Text = cleanText ,
301+ Id = id ,
302+ Level = level ,
303+ PageUrl = pageUrl
304+ } ) ;
305+ }
306+ }
307+ catch ( Exception ex )
308+ {
309+ Logger . LogWarning ( $ "Failed to extract headings from { pageUrl } : { ex . Message } ") ;
310+ }
311+
312+ return headings ;
313+ }
314+
250315 Task < byte [ ] > PrintHeaderFooter ( Outline toc , int pageNumber , int totalPages , Page contentPage )
251316 {
252317 var headerTemplate = ExpandTemplate ( GetHeaderFooter ( toc . pdfHeaderTemplate ) , pageNumber , totalPages ) ;
@@ -333,32 +398,51 @@ static string ExpandTemplate(string? pdfTemplate, int pageNumber, int totalPages
333398 }
334399
335400 static async Task CreatePdf (
336- Func < Outline , Uri , Task < byte [ ] ? > > printPdf , Func < Outline , int , int , Page , Task < byte [ ] > > printHeaderFooter , ProgressTask task ,
337- Uri outlineUrl , Outline outline , string outputFolder , string pdfOutputPath , Action < Dictionary < Outline , int > > updatePageNumbers , CancellationToken cancellationToken )
401+ Func < Outline , Uri , int , Task < ( byte [ ] ? bytes , List < HeadingInfo > headings ) > > printPdf , Func < Outline , int , int , Page , Task < byte [ ] > > printHeaderFooter , ProgressTask task ,
402+ Uri outlineUrl , Outline outline , string outputFolder , string pdfOutputPath , Action < Dictionary < Outline , int > > updatePageNumbers , Action < Dictionary < Uri , int > > updateUrlPageNumbers , Action < List < HeadingInfo > > updateHeadings , CancellationToken cancellationToken )
338403 {
339404 var pages = GetPages ( outline ) . ToArray ( ) ;
340405 if ( pages . Length == 0 )
341406 return ;
342407
343408 var pageBytes = new Dictionary < Outline , byte [ ] > ( ) ;
409+ var pageHeadings = new Dictionary < Outline , List < HeadingInfo > > ( ) ;
344410
345411 // Make progress at 99% before merge PDF
346412 task . MaxValue = pages . Length + ( pages . Length / 99.0 ) ;
347413
348414 await Parallel . ForEachAsync ( pages , new ParallelOptions { CancellationToken = cancellationToken } , async ( item , _ ) =>
349415 {
350416 var ( url , node ) = item ;
351- if ( await printPdf ( outline , url ) is { } bytes )
417+ var result = await printPdf ( outline , url , outline . pdfTocHeadingDepth ) ;
418+ if ( result . bytes is { } bytes )
352419 {
353420 lock ( pageBytes )
354421 pageBytes [ node ] = bytes ;
355422 }
423+ if ( result . headings . Count > 0 )
424+ {
425+ lock ( pageHeadings )
426+ pageHeadings [ node ] = result . headings ;
427+ }
356428 task . Increment ( 1 ) ;
357429 } ) ;
358430
431+ // Collect headings in document order:
432+ // - Page order: preserved by iterating `pages` array (parallel processing loses this)
433+ // - Within-page order: preserved by DOM order from QuerySelectorAllAsync
434+ var allHeadings = pages
435+ . Where ( p => pageHeadings . ContainsKey ( p . node ) )
436+ . SelectMany ( p => pageHeadings [ p . node ] )
437+ . ToList ( ) ;
438+
439+ // Update headings before page numbers are calculated
440+ updateHeadings ( allHeadings ) ;
441+
359442 var pagesByNode = pages . ToDictionary ( p => p . node ) ;
360443 var pagesByUrl = new Dictionary < Uri , List < ( Outline node , NamedDestinations namedDests ) > > ( ) ;
361444 var pageNumbers = new Dictionary < Outline , int > ( ) ;
445+ var urlPageNumbers = new Dictionary < Uri , int > ( ) ;
362446 var numberOfPages = 0 ;
363447
364448 foreach ( var ( url , node ) in pages )
@@ -379,6 +463,7 @@ static async Task CreatePdf(
379463
380464 pageBytes [ node ] = bytes ;
381465 pageNumbers [ node ] = numberOfPages + 1 ;
466+ urlPageNumbers [ CleanUrl ( url ) ] = numberOfPages + 1 ;
382467 numberOfPages += document . NumberOfPages ;
383468 }
384469
@@ -444,7 +529,10 @@ async Task MergePdf()
444529 {
445530 // Refresh TOC page numbers
446531 updatePageNumbers ( pageNumbers ) ;
447- bytes = await printPdf ( outline , url ) ;
532+ updateUrlPageNumbers ( urlPageNumbers ) ;
533+ updateHeadings ( allHeadings ) ;
534+ var result = await printPdf ( outline , url , 0 ) ; // 0 = don't extract headings from TOC page
535+ bytes = result . bytes ;
448536
449537 if ( bytes == null )
450538 continue ;
@@ -607,8 +695,40 @@ IEnumerable<BookmarkNode> CreateBookmarksCore(Outline[]? items, int level)
607695 }
608696 }
609697
610- static HtmlTemplate TocHtmlTemplate ( Uri baseUrl , Outline node , Dictionary < Outline , int > ? pageNumbers )
698+ static HtmlTemplate TocHtmlTemplate ( Uri baseUrl , Outline node , Dictionary < Outline , int > ? pageNumbers , Dictionary < Uri , int > ? urlPageNumbers , List < HeadingInfo > ? headings )
611699 {
700+ // If pdfTocSource is "headings" and we have headings, generate TOC from headings
701+ if ( node . pdfTocSource == "headings" && headings is { Count : > 0 } )
702+ {
703+ var headingTocContent = BuildHeadingToc ( baseUrl , headings , urlPageNumbers ) ;
704+ var cssStyles = Html ( $ """
705+ <style>
706+ /* Indentation for heading levels */
707+ li[data-level="1"] { "{ padding-left: 0; }" }
708+ li[data-level="2"] { "{ padding-left: 1.5em; }" }
709+ li[data-level="3"] { "{ padding-left: 3em; }" }
710+ li[data-level="4"] { "{ padding-left: 4.5em; }" }
711+ li[data-level="5"] { "{ padding-left: 6em; }" }
712+ li[data-level="6"] { "{ padding-left: 7.5em; }" }
713+ </style>
714+ """ ) ;
715+ return Html ( $ """
716+ <!DOCTYPE html>
717+ <html>
718+ <head>
719+ <link rel="stylesheet" href="/public/docfx.min.css">
720+ <link rel="stylesheet" href="/public/main.css">
721+ { cssStyles }
722+ </head>
723+ <body class="pdftoc">
724+ <h1>Table of Contents</h1>
725+ <ul>{ headingTocContent } </ul>
726+ </body>
727+ </html>
728+ """ ) ; ;
729+ }
730+
731+ // Default: generate TOC from toc.yml structure
612732 return Html ( $ """
613733 <!DOCTYPE html>
614734 <html>
@@ -637,6 +757,35 @@ static HtmlTemplate TocHtmlTemplate(Uri baseUrl, Outline node, Dictionary<Outlin
637757 """ ) ;
638758 }
639759
760+ static HtmlTemplate BuildHeadingToc ( Uri baseUrl , List < HeadingInfo > headings , Dictionary < Uri , int > ? urlPageNumbers )
761+ {
762+ // Build flat list of all headings with CSS-based indentation for hierarchy
763+ var result = new List < HtmlTemplate > ( ) ;
764+
765+ foreach ( var heading in headings )
766+ {
767+ var href = new UriBuilder ( heading . PageUrl ) { Fragment = heading . Id } . Uri ;
768+ var cleanUrl = new UriBuilder ( heading . PageUrl ) { Query = null , Fragment = null } . Uri ;
769+
770+ var pageNumberHtml = urlPageNumbers ? . TryGetValue ( cleanUrl , out var pageNum ) is true
771+ ? Html ( $ "<span class='spacer'></span> <span class='page-number'>{ pageNum } </span>")
772+ : default ;
773+
774+ // Use data-level attribute for CSS styling of indentation
775+ var item = Html ( $ """
776+ <li data-level='{ heading . Level } '>
777+ <a href='{ href } '>{ System . Web . HttpUtility . HtmlEncode ( heading . Text ) }
778+ { pageNumberHtml }
779+ </a>
780+ </li>
781+ """ ) ;
782+
783+ result . Add ( item ) ;
784+ }
785+
786+ return Html ( $ "{ result } ") ;
787+ }
788+
640789 /// <summary>
641790 /// Adds hidden links to headings to ensure Chromium saves heading anchors to named dests
642791 /// for cross page bookmark reference.
@@ -726,4 +875,19 @@ private static StringComparison GetStringComparison()
726875 ? StringComparison . OrdinalIgnoreCase
727876 : StringComparison . Ordinal ;
728877 }
878+
879+ private static bool IsTocPage ( Uri url ) => url . AbsolutePath . StartsWith ( "/_pdftoc/" ) ;
880+
881+ private static bool IsCoverPage ( Uri pageUri , string baseFolder , string ? pdfCoverPage )
882+ {
883+ Debug . Assert ( Path . IsPathFullyQualified ( baseFolder ) ) ;
884+
885+ if ( string . IsNullOrEmpty ( pdfCoverPage ) )
886+ return false ;
887+
888+ string pagePath = pageUri . AbsolutePath . TrimStart ( '/' ) ;
889+ string covePagePath = PathUtility . MakeRelativePath ( baseFolder , Path . GetFullPath ( Path . Combine ( baseFolder , pdfCoverPage ) ) ) ;
890+
891+ return pagePath . Equals ( covePagePath , GetStringComparison ( ) ) ;
892+ }
729893}
0 commit comments