Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions crates/oxc_codegen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ oxc_allocator = { workspace = true }
oxc_syntax = { workspace = true, features = ["to_js_string"] }
oxc_sourcemap = { workspace = true }
oxc_mangler = { workspace = true }
oxc_index = { workspace = true }

bitflags = { workspace = true }
nonmax = { workspace = true }
once_cell = { workspace = true }
daachorse = { workspace = true }
rustc-hash = { workspace = true }
Expand Down
108 changes: 82 additions & 26 deletions crates/oxc_codegen/src/sourcemap_builder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use std::sync::Arc;

use nonmax::NonMaxU32;

use oxc_index::{Idx, IndexVec};
use oxc_span::Span;
use oxc_syntax::identifier::{LS, PS};

Expand All @@ -9,16 +12,54 @@ const LS_OR_PS_SECOND: u8 = 0x80;
const LS_THIRD: u8 = 0xA8;
const PS_THIRD: u8 = 0xA9;

/// Line offset table
/// Index into vec of `ColumnOffsets`
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct ColumnOffsetsId(NonMaxU32);

impl Idx for ColumnOffsetsId {
#[allow(clippy::cast_possible_truncation)]
fn from_usize(idx: usize) -> Self {
assert!(idx < u32::MAX as usize);
// SAFETY: We just checked `idx` is a legal value for `NonMaxU32`
Self(unsafe { NonMaxU32::new_unchecked(idx as u32) })
}

fn index(self) -> usize {
self.0.get() as usize
}
}

/// Line offset tables.
///
/// Used for tracking lines and columns from byte offsets via binary search.
///
/// Code is adapted from [esbuild](https://github.com/evanw/esbuild/blob/cc74e6042a9f573bf58e1e3f165ebda70af4ad3b/internal/js_printer/js_printer.go#L4806-L4808)
///
/// Most lines of source code will not contain Unicode chars, so optimize storage for this common case.
///
/// Each line is represented by a `Line`.
/// Where a line is entirely ASCII, translating byte offset to UTF-16 column is simple,
/// given the byte offset of start of line. A column lookup table isn't needed for that line.
/// In this case, `Line::column_offsets_id` is `None`.
/// For rare lines which do contain Unicode chars, we store column offsets in a `ColumnOffsets` which
/// is stored in a separate `IndexVec`. `Line::column_offsets_id` contains index for that line's `ColumnOffsets`.
/// Storing column offset info which is rarely used in a separate structure keeps `Line` as small as possible.
#[derive(Debug, Default)]
pub struct LineOffsetTables {
lines: Vec<Line>,
column_offsets: IndexVec<ColumnOffsetsId, ColumnOffsets>,
}

#[derive(Debug)]
pub struct LineOffsetTable {
columns: Option<Vec<u32>>,
byte_offset_to_first: u32,
pub struct Line {
byte_offset_to_start_of_line: u32,
column_offsets_id: Option<ColumnOffsetsId>,
}

#[derive(Debug)]
pub struct ColumnOffsets {
byte_offset_to_first: u32,
columns: Box<[u32]>,
}

#[allow(clippy::struct_field_names)]
Expand All @@ -27,7 +68,7 @@ pub struct SourcemapBuilder {
original_source: Arc<str>,
last_generated_update: usize,
last_position: Option<u32>,
line_offset_tables: Vec<LineOffsetTable>,
line_offset_tables: LineOffsetTables,
sourcemap_builder: oxc_sourcemap::SourceMapBuilder,
generated_line: u32,
generated_column: u32,
Expand All @@ -40,7 +81,7 @@ impl Default for SourcemapBuilder {
original_source: "".into(),
last_generated_update: 0,
last_position: None,
line_offset_tables: vec![],
line_offset_tables: LineOffsetTables::default(),
sourcemap_builder: oxc_sourcemap::SourceMapBuilder::default(),
generated_line: 0,
generated_column: 0,
Expand Down Expand Up @@ -97,17 +138,19 @@ impl SourcemapBuilder {
fn search_original_line_and_column(&mut self, position: u32) -> (u32, u32) {
let result = self
.line_offset_tables
.partition_point(|table| table.byte_offset_to_start_of_line <= position)
as u32;
.lines
.partition_point(|table| table.byte_offset_to_start_of_line <= position);
let original_line = if result > 0 { result - 1 } else { 0 };
let line = &self.line_offset_tables[original_line as usize];
let line = &self.line_offset_tables.lines[original_line];
let mut original_column = position - line.byte_offset_to_start_of_line;
if original_column >= line.byte_offset_to_first {
if let Some(cols) = &line.columns {
original_column = cols[(original_column - line.byte_offset_to_first) as usize];
if let Some(column_offsets_id) = line.column_offsets_id {
let column_offsets = &self.line_offset_tables.column_offsets[column_offsets_id];
if original_column >= column_offsets.byte_offset_to_first {
original_column = column_offsets.columns
[(original_column - column_offsets.byte_offset_to_first) as usize];
}
}
(original_line, original_column)
(original_line as u32, original_column)
}

#[allow(clippy::cast_possible_truncation)]
Expand Down Expand Up @@ -170,8 +213,9 @@ impl SourcemapBuilder {
self.last_generated_update = output.len();
}

fn generate_line_offset_tables(content: &str) -> Vec<LineOffsetTable> {
let mut tables = vec![];
fn generate_line_offset_tables(content: &str) -> LineOffsetTables {
let mut lines = vec![];
let mut column_offsets = IndexVec::new();

// Process content line-by-line.
// For each line, start by assuming line will be entirely ASCII, and read byte-by-byte.
Expand All @@ -181,12 +225,9 @@ impl SourcemapBuilder {
// At end of line, go back to top of outer loop, and again assume ASCII for next line.
let mut line_byte_offset = 0;
'lines: loop {
tables.push(LineOffsetTable {
columns: None,
// `usize::MAX` so `original_column >= line.byte_offset_to_first` check in
// `search_original_line_and_column` fails if line is all ASCII
byte_offset_to_first: u32::MAX,
lines.push(Line {
byte_offset_to_start_of_line: line_byte_offset,
column_offsets_id: None,
});

let remaining = &content.as_bytes()[line_byte_offset as usize..];
Expand All @@ -209,11 +250,12 @@ impl SourcemapBuilder {
}
_ => {
// Unicode char found.
// Create `columns` Vec, and set `byte_offset_to_first`.
let table = tables.iter_mut().last().unwrap();
table.byte_offset_to_first = byte_offset_from_line_start;
table.columns = Some(vec![]);
let columns = table.columns.as_mut().unwrap();
// Set `column_offsets_id` for line and create `columns` Vec.
let line = lines.iter_mut().last().unwrap();
line.column_offsets_id =
Some(ColumnOffsetsId::from_usize(column_offsets.len()));

let mut columns = vec![];

// Loop through rest of line char-by-char.
// `chunk_byte_offset` in this loop is byte offset from start of this 1st
Expand Down Expand Up @@ -256,13 +298,27 @@ impl SourcemapBuilder {
// Line break found.
// `chunk_byte_offset` is now the offset of *end* of the line break.
line_byte_offset += chunk_byte_offset;

// Record column offsets
column_offsets.push(ColumnOffsets {
byte_offset_to_first: byte_offset_from_line_start,
columns: columns.into_boxed_slice(),
});

// Revert back to outer loop for next line
continue 'lines;
}

// EOF.
// One last column entry for EOF position.
columns.push(column);

// Record column offsets
column_offsets.push(ColumnOffsets {
byte_offset_to_first: byte_offset_from_line_start,
columns: columns.into_boxed_slice(),
});

break 'lines;
}
};
Expand All @@ -277,7 +333,7 @@ impl SourcemapBuilder {
break;
}

tables
LineOffsetTables { lines, column_offsets }
}
}

Expand Down