-
Notifications
You must be signed in to change notification settings - Fork 65
Expand file tree
/
Copy pathextractFile.js
More file actions
89 lines (77 loc) · 2.57 KB
/
extractFile.js
File metadata and controls
89 lines (77 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
// Assertion extractor, from file in current repo.
// Also sorts by ID (optional) and (optional) pulls in assertion text.
// First column is index of where it returns in results from Cheerio.
// Usage:
// From the root of the repository, run:
// % node testing/extractFile.js > testing/assertions.csv
const fs = require("fs");
const cheerio = require("cheerio");
const indexFileName = "index.html";
const extraAssertsFileName = "testing/inputs/extra-asserts.html";
const addAssertionTxt = true; // set true to add assertion text on last column
const sortData = true; // set to true to sort output data
const dataIndex = fs.readFileSync(indexFileName, "utf8");
// Load and parse HTML
const indexHTML = cheerio.load(dataIndex);
function cleanAssertionText(text) {
return text
.replace(/<\/?[a-zA-Z]+>/gi, " ") // convert markup to spaces
.replace(/[\r\n]/gm, " ") // convert newlines and carriage returns to spaces
.trim() // remove white space at beginning and end
.replace(/[ \t]+/gm, " ") // convert multiple spaces/tabs to one space
.replace(/"/gm, '""'); // escape quotes (for CSV) by doubling them
}
// Extract assertion and put them into an array of objects
var arr_data = [];
indexHTML(".rfc2119-assertion").each(function (i) {
arr_data.push({
html: indexHTML(this),
index: i
});
});
indexHTML(".rfc2119-default-assertion").each(function (i) {
arr_data.push({
html: indexHTML(this),
index: i
});
});
indexHTML(".rfc2119-table-assertion").each(function (i) {
arr_data.push({
html: indexHTML(this),
index: i
});
// console.log({
// "html": indexHTML(this),
// "index": i
// })
});
const data2 = fs.readFileSync(extraAssertsFileName, "utf8");
extraAsserts = cheerio.load(data2);
extraAsserts(".rfc2119-assertion").each(function (i) {
arr_data.push({
html: extraAsserts(this),
index: i
});
});
// Optionally sort assertions by ID
if (sortData) {
// this is an in-place sort
arr_data.sort((a, b) => {
let sa = a.html.attr("id");
let sb = b.html.attr("id");
return (sb < sa) - (sb > sa); // case-sensitive order
});
}
// Initialize headers in output CSV file
console.log(`"ID","Status"${addAssertionTxt ? ',"Assertion"' : ""}`);
// Output each row of array data into CSV, with optional assertion text
for (j = 0; j < arr_data.length; j++) {
let element = arr_data[j];
let id = element.html.attr("id");
let text = element.html.text();
let assertionTxt = "";
if (addAssertionTxt) {
assertionTxt = ',"' + cleanAssertionText(text.trim()) + '"';
}
console.log(`"${id}","null"${assertionTxt}`);
}