diff --git a/README.md b/README.md index a06ab11..9ce2fad 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Allow projects to have self-updating up-to-date documentation available in both * "Localizing" links to relative docs if specified (useful for multi-domain websites or multi-version doc). (see [#link-localization](#link-localization)) * This allows smooth integration with static document websites like [Docusaurus](https://docusaurus.io/) or [hugo](https://gohugo.io) based themes! * Flexible pre-processing allowing easy to use GitHub experience as well as website. (see [#transform-usage](#transformation)) -* Allows profiling(using [fgprof](https://github.com/felixge/fgprof)) and exports metrics(saves to file in [OpenMetrics](https://openmetrics.io/) format) for easy debugging +* Allows profiling(using [fgprof](https://github.com/felixge/fgprof)) and exports metrics(saves to file in [OpenMetrics](https://prometheus.io/docs/specs/om/open_metrics_spec/) format) for easy debugging ## Usage diff --git a/pkg/mdformatter/linktransformer/config.go b/pkg/mdformatter/linktransformer/config.go index 8760583..dfe194c 100644 --- a/pkg/mdformatter/linktransformer/config.go +++ b/pkg/mdformatter/linktransformer/config.go @@ -22,6 +22,9 @@ type Config struct { Cache cache.Config `yaml:"cache"` + // ExplicitLocalValidators forces all links (remote and local) to go through validators. + // If false (default), only http(s) links go to validators. + // Use it for additional the validation options on local links. ExplicitLocalValidators bool `yaml:"explicitLocalValidators"` Validators []ValidatorConfig `yaml:"validators"` Timeout string `yaml:"timeout"` @@ -38,14 +41,17 @@ type Config struct { type ValidatorConfig struct { // Regex for type of validator. For `githubPullsIssues` this is: (^http[s]?:\/\/)(www\.)?(github\.com\/){ORG_NAME}\/{REPO_NAME}(\/pull\/|\/issues\/). Regex string `yaml:"regex"` - // By default type is `roundtrip`. Could be `githubPullsIssues` or `ignore`. + // By default type is `roundtrip`. Could be `githubPullsIssues`, `ignore`, or `local`. Type ValidatorType `yaml:"type"` // GitHub repo token to avoid getting rate limited. Token string `yaml:"token"` + // Anchor for additional path to add before the local link check. + Anchor string `yaml:"anchor"` ghValidator GitHubPullsIssuesValidator rtValidator RoundTripValidator igValidator IgnoreValidator + lValidator LocalValidator } type RoundTripValidator struct { @@ -61,12 +67,17 @@ type IgnoreValidator struct { _regex *regexp.Regexp } +type LocalValidator struct { + _regex *regexp.Regexp + anchor string +} type ValidatorType string const ( roundtripValidator ValidatorType = "roundtrip" githubPullsIssuesValidator ValidatorType = "githubPullsIssues" ignoreValidator ValidatorType = "ignore" + localValidator ValidatorType = "local" ) const ( @@ -124,8 +135,12 @@ func ParseConfig(c []byte) (Config, error) { cfg.Validators[i].ghValidator._maxNum = maxNum case ignoreValidator: cfg.Validators[i].igValidator._regex = regexp.MustCompile(cfg.Validators[i].Regex) + case localValidator: + cfg.Validators[i].lValidator._regex = regexp.MustCompile(cfg.Validators[i].Regex) + cfg.Validators[i].lValidator.anchor = cfg.Validators[i].Anchor + default: - return Config{}, errors.New("Validator type not supported") + return Config{}, fmt.Errorf("validator type %v not supported", cfg.Validators[i].Type) } } return cfg, nil diff --git a/pkg/mdformatter/linktransformer/link.go b/pkg/mdformatter/linktransformer/link.go index a0536f3..e022519 100644 --- a/pkg/mdformatter/linktransformer/link.go +++ b/pkg/mdformatter/linktransformer/link.go @@ -398,28 +398,6 @@ func (v *validator) Close(ctx mdformatter.SourceContext) error { return merr.Err() } -func (v *validator) checkLocal(k futureKey) bool { - v.l.localLinksChecked.Inc() - // Check if link is email address. - if email := strings.TrimPrefix(k.dest, "mailto:"); email != k.dest { - if isValidEmail(email) { - return true - } - v.destFutures[k].resultFn = func() error { return fmt.Errorf("provided mailto link is not a valid email, got %v", k.dest) } - return false - } - - // Relative or absolute path. Check if exists. - newDest := absLocalLink(v.anchorDir, k.filepath, k.dest) - - // Local link. Check if exists. - if err := v.localLinks.Lookup(newDest); err != nil { - v.destFutures[k].resultFn = func() error { return fmt.Errorf("link %v, normalized to: %w", k.dest, err) } - return false - } - return true -} - func (v *validator) visit(filepath string, dest string, lineNumbers string) { v.futureMu.Lock() defer v.futureMu.Unlock() @@ -432,39 +410,19 @@ func (v *validator) visit(filepath string, dest string, lineNumbers string) { if !v.validateConfig.ExplicitLocalValidators { matches := remoteLinkPrefixRe.FindAllStringIndex(dest, 1) if matches == nil { - v.checkLocal(k) + _, _ = LocalValidator{}.IsValid(k, v) return } v.l.remoteLinksChecked.Inc() } + // TODO: Capture error? validator := v.validateConfig.GetValidatorForURL(dest) if validator != nil { - matched, err := validator.IsValid(k, v) - if matched && err == nil { - return - } + _, _ = validator.IsValid(k, v) + return } -} -// isValidEmail checks email structure and domain. -func isValidEmail(email string) bool { - // Check length. - if len(email) < 3 && len(email) > 254 { - return false - } - // Regex from https://www.w3.org/TR/2016/REC-html51-20161101/sec-forms.html#email-state-typeemail. - var emailRe = regexp.MustCompile("^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$") - if !emailRe.MatchString(email) { - return false - } - // Check email domain. - domain := strings.Split(email, "@") - mx, err := net.LookupMX(domain[1]) - if err != nil || len(mx) == 0 { - return false - } - return true } type localLinksCache map[string]*[]string diff --git a/pkg/mdformatter/linktransformer/link_test.go b/pkg/mdformatter/linktransformer/link_test.go index 8e156c1..87a0290 100644 --- a/pkg/mdformatter/linktransformer/link_test.go +++ b/pkg/mdformatter/linktransformer/link_test.go @@ -167,10 +167,39 @@ func TestValidator_TransformDestination(t *testing.T) { }) t.Run("check valid local links", func(t *testing.T) { + for _, viaLocalValidator := range []bool{false, true} { + t.Run("viaLocal="+fmt.Sprint(viaLocalValidator), func(t *testing.T) { + testFile := filepath.Join(tmpDir, "repo", "docs", "test", "valid-local-links.md") + testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo + +[1](.) [2](#yolo) [3](../test/valid-local-links.md) [4](../test/valid-local-links.md#yolo) [5](../a/doc.md) +`), os.ModePerm)) + + diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}) + testutil.Ok(t, err) + testutil.Equals(t, 0, len(diff), diff.String()) + + lt := MustNewValidator(logger, []byte(""), anchorDir, nil) + if viaLocalValidator { + lt = MustNewValidator(logger, []byte(` +version: 1 +explicitLocalValidators: true +validators: +- type: local + regex: '^.*$' +`), anchorDir, nil) + } + diff, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(lt)) + testutil.Ok(t, err) + testutil.Equals(t, 0, len(diff), diff.String()) + }) + } + }) + t.Run("check valid local links with anchor and ignore", func(t *testing.T) { testFile := filepath.Join(tmpDir, "repo", "docs", "test", "valid-local-links.md") testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo -[1](.) [2](#yolo) [3](../test/valid-local-links.md) [4](../test/valid-local-links.md#yolo) [5](../a/doc.md) +[1](.) [2](#yolo) [3](../test/valid-local-links.md) [4](../test/valid-local-links.md#yolo) [5](/doc.md) [6](../a/does-not-exists-on-purpose.md) `), os.ModePerm)) diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}) @@ -178,12 +207,22 @@ func TestValidator_TransformDestination(t *testing.T) { testutil.Equals(t, 0, len(diff), diff.String()) diff, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer( - MustNewValidator(logger, []byte(""), anchorDir, nil), + MustNewValidator(logger, []byte(` +version: 1 +explicitLocalValidators: true +validators: +- type: ignore + regex: '^../a/does-not-exists-on-purpose.md$' +- type: local + regex: '^/doc.md$' + anchor: "a" +- type: local + regex: '^.*$' +`), anchorDir, nil), )) testutil.Ok(t, err) testutil.Equals(t, 0, len(diff), diff.String()) }) - t.Run("check valid local links with dash", func(t *testing.T) { testFile := filepath.Join(tmpDir, "repo", "docs", "test", "valid-local-links-with-dash.md") testutil.Ok(t, os.WriteFile(testFile, []byte(`# Expose UI on a sub-path @@ -229,32 +268,44 @@ func TestValidator_TransformDestination(t *testing.T) { }) t.Run("check invalid local links", func(t *testing.T) { - testFile := filepath.Join(tmpDir, "repo", "docs", "test", "invalid-local-links.md") - filePath := "/repo/docs/test/invalid-local-links.md" - wdir, err := os.Getwd() - testutil.Ok(t, err) - relDirPath, err := filepath.Rel(wdir, tmpDir) - testutil.Ok(t, err) - testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo + for _, viaLocalValidator := range []bool{false, true} { + t.Run("viaLocal="+fmt.Sprint(viaLocalValidator), func(t *testing.T) { + testFile := filepath.Join(tmpDir, "repo", "docs", "test", "invalid-local-links.md") + filePath := "/repo/docs/test/invalid-local-links.md" + wdir, err := os.Getwd() + testutil.Ok(t, err) + relDirPath, err := filepath.Rel(wdir, tmpDir) + testutil.Ok(t, err) + testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo [1](.) [2](#not-yolo) [3](../test2/invalid-local-links.md) [4](../test/invalid-local-links.md#not-yolo) [5](../test/doc.md) `), os.ModePerm)) - diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}) - testutil.Ok(t, err) - testutil.Equals(t, 0, len(diff), diff.String()) - - _, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer( - MustNewValidator(logger, []byte(""), anchorDir, nil), - )) - testutil.NotOk(t, err) - - testutil.Equals(t, fmt.Sprintf("%v: 4 errors: "+ - "%v:3: link ../test2/invalid-local-links.md, normalized to: %v/repo/docs/test2/invalid-local-links.md: file not found; "+ - "%v:3: link ../test/invalid-local-links.md#not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id; "+ - "%v:3: link ../test/doc.md, normalized to: %v/repo/docs/test/doc.md: file not found; "+ - "%v:3: link #not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id", - tmpDir+filePath, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir), err.Error()) + diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}) + testutil.Ok(t, err) + testutil.Equals(t, 0, len(diff), diff.String()) + + lt := MustNewValidator(logger, []byte(""), anchorDir, nil) + if viaLocalValidator { + lt = MustNewValidator(logger, []byte(` +version: 1 +explicitLocalValidators: true +validators: +- type: local + regex: '^.*$' +`), anchorDir, nil) + } + _, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(lt)) + testutil.NotOk(t, err) + + testutil.Equals(t, fmt.Sprintf("%v: 4 errors: "+ + "%v:3: link ../test2/invalid-local-links.md, normalized to: %v/repo/docs/test2/invalid-local-links.md: file not found; "+ + "%v:3: link ../test/invalid-local-links.md#not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id; "+ + "%v:3: link ../test/doc.md, normalized to: %v/repo/docs/test/doc.md: file not found; "+ + "%v:3: link #not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id", + tmpDir+filePath, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir), err.Error()) + }) + } }) t.Run("check valid email link", func(t *testing.T) { diff --git a/pkg/mdformatter/linktransformer/validator.go b/pkg/mdformatter/linktransformer/validator.go index 9787575..766f39d 100644 --- a/pkg/mdformatter/linktransformer/validator.go +++ b/pkg/mdformatter/linktransformer/validator.go @@ -5,6 +5,9 @@ package linktransformer import ( "fmt" + "net" + "path/filepath" + "regexp" "strconv" "strings" ) @@ -13,6 +16,52 @@ type Validator interface { IsValid(k futureKey, r *validator) (bool, error) } +func (v LocalValidator) IsValid(k futureKey, r *validator) (bool, error) { + r.l.localLinksChecked.Inc() + // Check if link is email address. + if email := strings.TrimPrefix(k.dest, "mailto:"); email != k.dest { + if isValidEmail(email) { + return true, nil + } + r.destFutures[k].resultFn = func() error { return fmt.Errorf("provided mailto link is not a valid email, got %v", k.dest) } + return false, nil + } + + anchorDir := r.anchorDir + if v.anchor != "" { + anchorDir = filepath.Join(anchorDir, v.anchor) + } + // Relative or absolute path. Check if exists. + newDest := absLocalLink(anchorDir, k.filepath, k.dest) + + // Local link. Check if exists. + if err := r.localLinks.Lookup(newDest); err != nil { + r.destFutures[k].resultFn = func() error { return fmt.Errorf("link %v, normalized to: %w", k.dest, err) } + return false, nil + } + return true, nil +} + +// isValidEmail checks email structure and domain. +func isValidEmail(email string) bool { + // Check length. + if len(email) < 3 && len(email) > 254 { + return false + } + // Regex from https://www.w3.org/TR/2016/REC-html51-20161101/sec-forms.html#email-state-typeemail. + var emailRe = regexp.MustCompile("^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$") + if !emailRe.MatchString(email) { + return false + } + // Check email domain. + domain := strings.Split(email, "@") + mx, err := net.LookupMX(domain[1]) + if err != nil || len(mx) == 0 { + return false + } + return true +} + // GitHubPullsIssuesValidator.IsValid skips visiting all GitHub issue/PR links. func (v GitHubPullsIssuesValidator) IsValid(k futureKey, r *validator) (bool, error) { r.l.githubSkippedLinks.Inc() @@ -36,7 +85,7 @@ func (v RoundTripValidator) IsValid(k futureKey, r *validator) (bool, error) { matches := remoteLinkPrefixRe.FindAllStringIndex(k.dest, 1) if matches == nil && r.validateConfig.ExplicitLocalValidators { r.l.localLinksChecked.Inc() - return r.checkLocal(k), nil + return LocalValidator{}.IsValid(k, r) } // Result will be in future. @@ -72,7 +121,7 @@ func (v RoundTripValidator) IsValid(k futureKey, r *validator) (bool, error) { return true, nil } -// IgnoreValidator.IsValid returns true if matched so that link in not checked. +// IsValid returns true if matched so that link in not checked. func (v IgnoreValidator) IsValid(k futureKey, r *validator) (bool, error) { r.l.ignoreSkippedLinks.Inc() @@ -98,6 +147,11 @@ func (v Config) GetValidatorForURL(URL string) Validator { continue } return val.igValidator + case localValidator: + if !val.lValidator._regex.MatchString(URL) { + continue + } + return val.lValidator default: panic("unexpected validator type") }