@@ -430,11 +430,13 @@ func New(options *Options) (*Runner, error) {
430430 }
431431
432432 runner .simHashes = gcache.New [uint64 , struct {}](1000 ).ARC ().Build ()
433- ditClassifier , err := dit .New ()
434- if err != nil {
435- gologger .Warning ().Msgf ("Could not initialize page classifier: %s" , err )
433+ if options .JSONOutput || options .CSVOutput || len (options .OutputFilterPageType ) > 0 {
434+ ditClassifier , err := dit .New ()
435+ if err != nil {
436+ gologger .Warning ().Msgf ("Could not initialize page classifier: %s" , err )
437+ }
438+ runner .ditClassifier = ditClassifier
436439 }
437- runner .ditClassifier = ditClassifier
438440
439441 if options .SecretFile != "" {
440442 authProviderOpts := & authprovider.AuthProviderOptions {
@@ -652,12 +654,16 @@ func (r *Runner) duplicate(result *Result) bool {
652654 return false
653655}
654656
655- func (r * Runner ) classifyPage (body string , pHash uint64 ) map [string ]interface {} {
656- kb := map [string ]any {"pHash" : pHash }
657+ func (r * Runner ) classifyPage (headlessBody , body string , pHash uint64 ) map [string ]interface {} {
658+ kb := map [string ]interface {} {"pHash" : pHash }
657659 if r .ditClassifier == nil {
658660 return kb
659661 }
660- result , err := r .ditClassifier .ExtractPageType (body )
662+ html := body
663+ if headlessBody != "" {
664+ html = headlessBody
665+ }
666+ result , err := r .ditClassifier .ExtractPageType (html )
661667 if err != nil {
662668 return kb
663669 }
@@ -2636,7 +2642,7 @@ retry:
26362642 ExtractRegex : extractRegex ,
26372643 ScreenshotBytes : screenshotBytes ,
26382644 HeadlessBody : headlessBody ,
2639- KnowledgeBase : r .classifyPage (respData , pHash ),
2645+ KnowledgeBase : r .classifyPage (headlessBody , respData , pHash ),
26402646 TechnologyDetails : technologyDetails ,
26412647 Resolvers : resolvers ,
26422648 RequestRaw : requestDump ,
0 commit comments