33const path = require ( 'path' )
44const fs = require ( 'fs' )
55const linkinator = require ( 'linkinator' )
6- const dedent = require ( 'dedent' )
76const program = require ( 'commander' )
8- const { escapeRegExp } = require ( 'lodash' )
7+ const { pull , uniq } = require ( 'lodash' )
98const checker = new linkinator . LinkChecker ( )
109const rimraf = require ( 'rimraf' ) . sync
10+ const mkdirp = require ( 'mkdirp' ) . sync
1111const root = 'https://docs.github.com'
1212const englishRoot = `${ root } /en`
1313const { deprecated } = require ( '../lib/enterprise-server-releases' )
14+ const got = require ( 'got' )
15+
16+ // Links with these codes may or may not really be broken.
17+ const retryStatusCodes = [ 429 , 503 ]
1418
1519// [start-readme]
1620//
1721// This script runs once per day via a scheduled GitHub Action to check all links in
1822// English content, not including deprecated Enterprise Server content. It opens an issue
19- // if it finds broken links. To exclude a link, add it to `lib/excluded-links.js`.
23+ // if it finds broken links. To exclude a link path , add it to `lib/excluded-links.js`.
2024//
2125// [end-readme]
2226
2327program
2428 . description ( 'Check all links in the English docs.' )
2529 . option ( '-d, --dry-run' , 'Turn off recursion to get a fast minimal report (useful for previewing output).' )
30+ . option ( '-p, --path <PATH>' , 'Provide an optional path to check. Best used with --dry-run. If not provided, defaults to the homepage.' )
2631 . parse ( process . argv )
2732
2833// Skip excluded links defined in separate file.
2934const excludedLinks = require ( '../lib/excluded-links' )
30- . map ( link => escapeRegExp ( link ) )
3135
3236// Skip non-English content.
3337const languagesToSkip = Object . keys ( require ( '../lib/languages' ) )
@@ -40,7 +44,7 @@ const languagesToSkip = Object.keys(require('../lib/languages'))
4044const enterpriseReleasesToSkip = new RegExp ( `${ root } .+?[/@](${ deprecated . join ( '|' ) } )/` )
4145
4246const config = {
43- path : englishRoot ,
47+ path : program . path || englishRoot ,
4448 concurrency : 300 ,
4549 // If this is a dry run, turn off recursion.
4650 recurse : ! program . dryRun ,
@@ -56,40 +60,74 @@ const config = {
5660main ( )
5761
5862async function main ( ) {
59- const startTime = new Date ( )
60-
6163 // Clear and recreate a directory for logs.
6264 const logFile = path . join ( __dirname , '../.linkinator/full.log' )
6365 rimraf ( path . dirname ( logFile ) )
64- fs . mkdirSync ( path . dirname ( logFile ) , { recursive : true } )
66+ mkdirp ( path . dirname ( logFile ) )
6567
6668 // Update CLI output and append to logfile after each checked link.
6769 checker . on ( 'link' , result => {
6870 fs . appendFileSync ( logFile , JSON . stringify ( result ) + '\n' )
6971 } )
7072
7173 // Start the scan; events will be logged as they occur.
72- const result = await checker . check ( config )
73-
74- // Scan is complete! Display the results.
75- const endTime = new Date ( )
76- const skippedLinks = result . links . filter ( x => x . state === 'SKIPPED' )
77- const brokenLinks = result . links . filter ( x => x . state === 'BROKEN' )
78-
79- console . log ( dedent `
80- ${ brokenLinks . length } broken links found on docs.github.com
81-
82- Link scan completed in ${ endTime - startTime } ms
83- Total links: ${ result . links . length }
84- Skipped links: ${ skippedLinks . length }
85- Broken links: ${ brokenLinks . length }
86- For more details see ${ path . relative ( process . cwd ( ) , logFile ) }
87- ` )
88-
89- if ( brokenLinks . length ) {
90- console . log ( '\n\n' + JSON . stringify ( brokenLinks , null , 2 ) )
91- process . exit ( 1 )
74+ const result = ( await checker . check ( config ) ) . links
75+
76+ // Scan is complete! Filter the results for broken links.
77+ const brokenLinks = result
78+ . filter ( link => link . state === 'BROKEN' )
79+
80+ // Links to retry individually.
81+ const linksToRetry = brokenLinks
82+ . filter ( link => ! link . status || retryStatusCodes . includes ( link . status ) )
83+
84+ await Promise . all ( linksToRetry
85+ . map ( async ( link ) => {
86+ try {
87+ // got throws an HTTPError if response code is not 2xx or 3xx.
88+ // If got succeeds, we can remove the link from the list.
89+ await got ( link . url )
90+ pull ( brokenLinks , link )
91+ // If got fails, do nothing. The link is already in the broken list.
92+ } catch ( err ) {
93+ // noop
94+ }
95+ } ) )
96+
97+ // Exit successfully if no broken links!
98+ if ( ! brokenLinks . length ) {
99+ console . log ( 'All links are good!' )
100+ process . exit ( 0 )
92101 }
93102
94- process . exit ( 0 )
103+ // Format and display the results.
104+ console . log ( `${ brokenLinks . length } broken links found on docs.github.com\n` )
105+ displayBrokenLinks ( brokenLinks )
106+
107+ // Exit unsuccessfully if broken links are found.
108+ process . exit ( 1 )
109+ }
110+
111+ function displayBrokenLinks ( brokenLinks ) {
112+ // Sort results by status code.
113+ const allStatusCodes = uniq ( brokenLinks
114+ // Coerce undefined status codes into `Invalid` strings so we can display them.
115+ // Without this, undefined codes get JSON.stringified as `0`, which is not useful output.
116+ . map ( link => {
117+ if ( ! link . status ) link . status = 'Invalid'
118+ return link
119+ } )
120+ . map ( link => link . status )
121+ )
122+
123+ allStatusCodes . forEach ( statusCode => {
124+ const brokenLinksForStatus = brokenLinks . filter ( x => x . status === statusCode )
125+
126+ console . log ( `## Status ${ statusCode } : Found ${ brokenLinksForStatus . length } broken links` )
127+ console . log ( '```' )
128+ brokenLinksForStatus . forEach ( brokenLinkObj => {
129+ console . log ( JSON . stringify ( brokenLinkObj , null , 2 ) )
130+ } )
131+ console . log ( '```' )
132+ } )
95133}
0 commit comments