Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Allow projects to have self-updating up-to-date documentation available in both
* "Localizing" links to relative docs if specified (useful for multi-domain websites or multi-version doc). (see [#link-localization](#link-localization))
* This allows smooth integration with static document websites like [Docusaurus](https://docusaurus.io/) or [hugo](https://gohugo.io) based themes!
* Flexible pre-processing allowing easy to use GitHub experience as well as website. (see [#transform-usage](#transformation))
* Allows profiling(using [fgprof](https://github.com/felixge/fgprof)) and exports metrics(saves to file in [OpenMetrics](https://openmetrics.io/) format) for easy debugging
* Allows profiling(using [fgprof](https://github.com/felixge/fgprof)) and exports metrics(saves to file in [OpenMetrics](https://prometheus.io/docs/specs/om/open_metrics_spec/) format) for easy debugging

## Usage

Expand Down
19 changes: 17 additions & 2 deletions pkg/mdformatter/linktransformer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ type Config struct {

Cache cache.Config `yaml:"cache"`

// ExplicitLocalValidators forces all links (remote and local) to go through validators.
// If false (default), only http(s) links go to validators.
// Use it for additional the validation options on local links.
ExplicitLocalValidators bool `yaml:"explicitLocalValidators"`
Validators []ValidatorConfig `yaml:"validators"`
Timeout string `yaml:"timeout"`
Expand All @@ -38,14 +41,17 @@ type Config struct {
type ValidatorConfig struct {
// Regex for type of validator. For `githubPullsIssues` this is: (^http[s]?:\/\/)(www\.)?(github\.com\/){ORG_NAME}\/{REPO_NAME}(\/pull\/|\/issues\/).
Regex string `yaml:"regex"`
// By default type is `roundtrip`. Could be `githubPullsIssues` or `ignore`.
// By default type is `roundtrip`. Could be `githubPullsIssues`, `ignore`, or `local`.
Type ValidatorType `yaml:"type"`
// GitHub repo token to avoid getting rate limited.
Token string `yaml:"token"`
// Anchor for additional path to add before the local link check.
Anchor string `yaml:"anchor"`

ghValidator GitHubPullsIssuesValidator
rtValidator RoundTripValidator
igValidator IgnoreValidator
lValidator LocalValidator
}

type RoundTripValidator struct {
Expand All @@ -61,12 +67,17 @@ type IgnoreValidator struct {
_regex *regexp.Regexp
}

type LocalValidator struct {
_regex *regexp.Regexp
anchor string
}
type ValidatorType string

const (
roundtripValidator ValidatorType = "roundtrip"
githubPullsIssuesValidator ValidatorType = "githubPullsIssues"
ignoreValidator ValidatorType = "ignore"
localValidator ValidatorType = "local"
)

const (
Expand Down Expand Up @@ -124,8 +135,12 @@ func ParseConfig(c []byte) (Config, error) {
cfg.Validators[i].ghValidator._maxNum = maxNum
case ignoreValidator:
cfg.Validators[i].igValidator._regex = regexp.MustCompile(cfg.Validators[i].Regex)
case localValidator:
cfg.Validators[i].lValidator._regex = regexp.MustCompile(cfg.Validators[i].Regex)
cfg.Validators[i].lValidator.anchor = cfg.Validators[i].Anchor

default:
return Config{}, errors.New("Validator type not supported")
return Config{}, fmt.Errorf("validator type %v not supported", cfg.Validators[i].Type)
}
}
return cfg, nil
Expand Down
50 changes: 4 additions & 46 deletions pkg/mdformatter/linktransformer/link.go
Original file line number Diff line number Diff line change
Expand Up @@ -398,28 +398,6 @@ func (v *validator) Close(ctx mdformatter.SourceContext) error {
return merr.Err()
}

func (v *validator) checkLocal(k futureKey) bool {
v.l.localLinksChecked.Inc()
// Check if link is email address.
if email := strings.TrimPrefix(k.dest, "mailto:"); email != k.dest {
if isValidEmail(email) {
return true
}
v.destFutures[k].resultFn = func() error { return fmt.Errorf("provided mailto link is not a valid email, got %v", k.dest) }
return false
}

// Relative or absolute path. Check if exists.
newDest := absLocalLink(v.anchorDir, k.filepath, k.dest)

// Local link. Check if exists.
if err := v.localLinks.Lookup(newDest); err != nil {
v.destFutures[k].resultFn = func() error { return fmt.Errorf("link %v, normalized to: %w", k.dest, err) }
return false
}
return true
}

func (v *validator) visit(filepath string, dest string, lineNumbers string) {
v.futureMu.Lock()
defer v.futureMu.Unlock()
Expand All @@ -432,39 +410,19 @@ func (v *validator) visit(filepath string, dest string, lineNumbers string) {
if !v.validateConfig.ExplicitLocalValidators {
matches := remoteLinkPrefixRe.FindAllStringIndex(dest, 1)
if matches == nil {
v.checkLocal(k)
_, _ = LocalValidator{}.IsValid(k, v)
return
}
v.l.remoteLinksChecked.Inc()
}

// TODO: Capture error?
validator := v.validateConfig.GetValidatorForURL(dest)
if validator != nil {
matched, err := validator.IsValid(k, v)
if matched && err == nil {
return
}
_, _ = validator.IsValid(k, v)
return
}
}

// isValidEmail checks email structure and domain.
func isValidEmail(email string) bool {
// Check length.
if len(email) < 3 && len(email) > 254 {
return false
}
// Regex from https://www.w3.org/TR/2016/REC-html51-20161101/sec-forms.html#email-state-typeemail.
var emailRe = regexp.MustCompile("^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$")
if !emailRe.MatchString(email) {
return false
}
// Check email domain.
domain := strings.Split(email, "@")
mx, err := net.LookupMX(domain[1])
if err != nil || len(mx) == 0 {
return false
}
return true
}

type localLinksCache map[string]*[]string
Expand Down
101 changes: 76 additions & 25 deletions pkg/mdformatter/linktransformer/link_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,23 +167,62 @@ func TestValidator_TransformDestination(t *testing.T) {
})

t.Run("check valid local links", func(t *testing.T) {
for _, viaLocalValidator := range []bool{false, true} {
t.Run("viaLocal="+fmt.Sprint(viaLocalValidator), func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "valid-local-links.md")
testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo

[1](.) [2](#yolo) [3](../test/valid-local-links.md) [4](../test/valid-local-links.md#yolo) [5](../a/doc.md)
`), os.ModePerm))

diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())

lt := MustNewValidator(logger, []byte(""), anchorDir, nil)
if viaLocalValidator {
lt = MustNewValidator(logger, []byte(`
version: 1
explicitLocalValidators: true
validators:
- type: local
regex: '^.*$'
`), anchorDir, nil)
}
diff, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(lt))
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())
})
}
})
t.Run("check valid local links with anchor and ignore", func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "valid-local-links.md")
testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo

[1](.) [2](#yolo) [3](../test/valid-local-links.md) [4](../test/valid-local-links.md#yolo) [5](../a/doc.md)
[1](.) [2](#yolo) [3](../test/valid-local-links.md) [4](../test/valid-local-links.md#yolo) [5](/doc.md) [6](../a/does-not-exists-on-purpose.md)
`), os.ModePerm))

diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())

diff, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, []byte(""), anchorDir, nil),
MustNewValidator(logger, []byte(`
version: 1
explicitLocalValidators: true
validators:
- type: ignore
regex: '^../a/does-not-exists-on-purpose.md$'
- type: local
regex: '^/doc.md$'
anchor: "a"
- type: local
regex: '^.*$'
`), anchorDir, nil),
))
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())
})

t.Run("check valid local links with dash", func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "valid-local-links-with-dash.md")
testutil.Ok(t, os.WriteFile(testFile, []byte(`# Expose UI on a sub-path
Expand Down Expand Up @@ -229,32 +268,44 @@ func TestValidator_TransformDestination(t *testing.T) {
})

t.Run("check invalid local links", func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "invalid-local-links.md")
filePath := "/repo/docs/test/invalid-local-links.md"
wdir, err := os.Getwd()
testutil.Ok(t, err)
relDirPath, err := filepath.Rel(wdir, tmpDir)
testutil.Ok(t, err)
testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo
for _, viaLocalValidator := range []bool{false, true} {
t.Run("viaLocal="+fmt.Sprint(viaLocalValidator), func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "invalid-local-links.md")
filePath := "/repo/docs/test/invalid-local-links.md"
wdir, err := os.Getwd()
testutil.Ok(t, err)
relDirPath, err := filepath.Rel(wdir, tmpDir)
testutil.Ok(t, err)
testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo

[1](.) [2](#not-yolo) [3](../test2/invalid-local-links.md) [4](../test/invalid-local-links.md#not-yolo) [5](../test/doc.md)
`), os.ModePerm))

diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())

_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, []byte(""), anchorDir, nil),
))
testutil.NotOk(t, err)

testutil.Equals(t, fmt.Sprintf("%v: 4 errors: "+
"%v:3: link ../test2/invalid-local-links.md, normalized to: %v/repo/docs/test2/invalid-local-links.md: file not found; "+
"%v:3: link ../test/invalid-local-links.md#not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id; "+
"%v:3: link ../test/doc.md, normalized to: %v/repo/docs/test/doc.md: file not found; "+
"%v:3: link #not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id",
tmpDir+filePath, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir), err.Error())
diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())

lt := MustNewValidator(logger, []byte(""), anchorDir, nil)
if viaLocalValidator {
lt = MustNewValidator(logger, []byte(`
version: 1
explicitLocalValidators: true
validators:
- type: local
regex: '^.*$'
`), anchorDir, nil)
}
_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(lt))
testutil.NotOk(t, err)

testutil.Equals(t, fmt.Sprintf("%v: 4 errors: "+
"%v:3: link ../test2/invalid-local-links.md, normalized to: %v/repo/docs/test2/invalid-local-links.md: file not found; "+
"%v:3: link ../test/invalid-local-links.md#not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id; "+
"%v:3: link ../test/doc.md, normalized to: %v/repo/docs/test/doc.md: file not found; "+
"%v:3: link #not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id",
tmpDir+filePath, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir), err.Error())
})
}
})

t.Run("check valid email link", func(t *testing.T) {
Expand Down
58 changes: 56 additions & 2 deletions pkg/mdformatter/linktransformer/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ package linktransformer

import (
"fmt"
"net"
"path/filepath"
"regexp"
"strconv"
"strings"
)
Expand All @@ -13,6 +16,52 @@ type Validator interface {
IsValid(k futureKey, r *validator) (bool, error)
}

func (v LocalValidator) IsValid(k futureKey, r *validator) (bool, error) {
r.l.localLinksChecked.Inc()
// Check if link is email address.
if email := strings.TrimPrefix(k.dest, "mailto:"); email != k.dest {
if isValidEmail(email) {
return true, nil
}
r.destFutures[k].resultFn = func() error { return fmt.Errorf("provided mailto link is not a valid email, got %v", k.dest) }
return false, nil
}

anchorDir := r.anchorDir
if v.anchor != "" {
anchorDir = filepath.Join(anchorDir, v.anchor)
}
// Relative or absolute path. Check if exists.
newDest := absLocalLink(anchorDir, k.filepath, k.dest)

// Local link. Check if exists.
if err := r.localLinks.Lookup(newDest); err != nil {
r.destFutures[k].resultFn = func() error { return fmt.Errorf("link %v, normalized to: %w", k.dest, err) }
return false, nil
}
return true, nil
}

// isValidEmail checks email structure and domain.
func isValidEmail(email string) bool {
// Check length.
if len(email) < 3 && len(email) > 254 {
return false
}
// Regex from https://www.w3.org/TR/2016/REC-html51-20161101/sec-forms.html#email-state-typeemail.
var emailRe = regexp.MustCompile("^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$")
if !emailRe.MatchString(email) {
return false
}
// Check email domain.
domain := strings.Split(email, "@")
mx, err := net.LookupMX(domain[1])
if err != nil || len(mx) == 0 {
return false
}
return true
}

// GitHubPullsIssuesValidator.IsValid skips visiting all GitHub issue/PR links.
func (v GitHubPullsIssuesValidator) IsValid(k futureKey, r *validator) (bool, error) {
r.l.githubSkippedLinks.Inc()
Expand All @@ -36,7 +85,7 @@ func (v RoundTripValidator) IsValid(k futureKey, r *validator) (bool, error) {
matches := remoteLinkPrefixRe.FindAllStringIndex(k.dest, 1)
if matches == nil && r.validateConfig.ExplicitLocalValidators {
r.l.localLinksChecked.Inc()
return r.checkLocal(k), nil
return LocalValidator{}.IsValid(k, r)
}

// Result will be in future.
Expand Down Expand Up @@ -72,7 +121,7 @@ func (v RoundTripValidator) IsValid(k futureKey, r *validator) (bool, error) {
return true, nil
}

// IgnoreValidator.IsValid returns true if matched so that link in not checked.
// IsValid returns true if matched so that link in not checked.
func (v IgnoreValidator) IsValid(k futureKey, r *validator) (bool, error) {
r.l.ignoreSkippedLinks.Inc()

Expand All @@ -98,6 +147,11 @@ func (v Config) GetValidatorForURL(URL string) Validator {
continue
}
return val.igValidator
case localValidator:
if !val.lValidator._regex.MatchString(URL) {
continue
}
return val.lValidator
default:
panic("unexpected validator type")
}
Expand Down
Loading