Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions lib/floki.ex
Original file line number Diff line number Diff line change
Expand Up @@ -790,4 +790,19 @@ defmodule Floki do
def filter_out(elements, selector) do
FilterOut.filter_out(elements, selector)
end

@doc """
Escapes a string for use as a CSS identifier.

## Examples

iex> Floki.css_escape("hello world")
"hello\\\\ world"

iex> Floki.css_escape("-123")
"-\\\\31 23"

"""
@spec css_escape(String.t()) :: String.t()
def css_escape(value), do: Floki.CSSEscape.escape(value)
end
77 changes: 77 additions & 0 deletions lib/floki/css_escape.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
defmodule Floki.CSSEscape do
@moduledoc false

# This is a direct translation of
# https://github.com/mathiasbynens/CSS.escape/blob/master/css.escape.js
# into Elixir.

@doc """
Escapes a string for use as a CSS identifier.

## Examples

iex> Floki.CSSEscape.escape("hello world")
"hello\\\\ world"

iex> Floki.CSSEscape.escape("-123")
"-\\\\31 23"

"""
@spec escape(String.t()) :: String.t()
def escape(value) when is_binary(value) do
value
|> String.to_charlist()
|> escape_chars()
|> IO.iodata_to_binary()
end

def escape(_), do: raise(ArgumentError, "CSS.escape requires a string argument")

defp escape_chars(chars) do
case chars do
# If the character is the first character and is a `-` (U+002D), and
# there is no second character, […]
[?- | []] -> ["\\-"]
_ -> do_escape_chars(chars, 0, [])
end
end

defp do_escape_chars([], _, acc), do: Enum.reverse(acc)

defp do_escape_chars([char | rest], index, acc) do
escaped =
cond do
# If the character is NULL (U+0000), then the REPLACEMENT CHARACTER
# (U+FFFD).
char == 0 ->
<<0xFFFD::utf8>>

# If the character is in the range [\1-\1F] (U+0001 to U+001F) or is
# U+007F,
# if the character is the first character and is in the range [0-9]
# (U+0030 to U+0039),
# if the character is the second character and is in the range [0-9]
# (U+0030 to U+0039) and the first character is a `-` (U+002D),
char in 0x0001..0x001F or char == 0x007F or
(index == 0 and char in ?0..?9) or
(index == 1 and char in ?0..?9 and hd(acc) == "-") ->
# https://drafts.csswg.org/cssom/#escape-a-character-as-code-point
["\\", Integer.to_string(char, 16), " "]

# If the character is not handled by one of the above rules and is
# greater than or equal to U+0080, is `-` (U+002D) or `_` (U+005F), or
# is in one of the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to
# U+005A), or [a-z] (U+0061 to U+007A), […]
char >= 0x0080 or char in [?-, ?_] or char in ?0..?9 or char in ?A..?Z or char in ?a..?z ->
# the character itself
<<char::utf8>>

true ->
# Otherwise, the escaped character.
# https://drafts.csswg.org/cssom/#escape-a-character
["\\", <<char::utf8>>]
end

do_escape_chars(rest, index + 1, [escaped | acc])
end
end
79 changes: 79 additions & 0 deletions test/floki/css_escape_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
defmodule Floki.CSSEscapeTest do
use ExUnit.Case, async: true

doctest Floki.CSSEscape

test "null character" do
assert Floki.CSSEscape.escape(<<0>>) == <<0xFFFD::utf8>>
assert Floki.CSSEscape.escape("a\u0000") == "a\ufffd"
assert Floki.CSSEscape.escape("\u0000b") == "\ufffdb"
assert Floki.CSSEscape.escape("a\u0000b") == "a\ufffdb"
end

test "replacement character" do
assert Floki.CSSEscape.escape(<<0xFFFD::utf8>>) == <<0xFFFD::utf8>>
assert Floki.CSSEscape.escape("a\ufffd") == "a\ufffd"
assert Floki.CSSEscape.escape("\ufffdb") == "\ufffdb"
assert Floki.CSSEscape.escape("a\ufffdb") == "a\ufffdb"
end

test "invalid input" do
assert_raise ArgumentError, fn -> Floki.CSSEscape.escape(nil) end
end

test "control characters" do
assert Floki.CSSEscape.escape(<<0x01, 0x02, 0x1E, 0x1F>>) == "\\1 \\2 \\1E \\1F "
end

test "leading digit" do
for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do
assert Floki.CSSEscape.escape("#{digit}a") == "\\#{expected} a"
end
end

test "non-leading digit" do
for digit <- 0..9 do
assert Floki.CSSEscape.escape("a#{digit}b") == "a#{digit}b"
end
end

test "leading hyphen and digit" do
for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do
assert Floki.CSSEscape.escape("-#{digit}a") == "-\\#{expected} a"
end
end

test "hyphens" do
assert Floki.CSSEscape.escape("-") == "\\-"
assert Floki.CSSEscape.escape("-a") == "-a"
assert Floki.CSSEscape.escape("--") == "--"
assert Floki.CSSEscape.escape("--a") == "--a"
end

test "non-ASCII and special characters" do
assert Floki.CSSEscape.escape("🤷🏻‍♂️-_©") == "🤷🏻‍♂️-_©"

assert Floki.CSSEscape.escape(
<<0x7F,
"\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f">>
) ==
"\\7F \u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f"

assert Floki.CSSEscape.escape("\u00a0\u00a1\u00a2") == "\u00a0\u00a1\u00a2"
end

test "alphanumeric characters" do
assert Floki.CSSEscape.escape("a0123456789b") == "a0123456789b"
assert Floki.CSSEscape.escape("abcdefghijklmnopqrstuvwxyz") == "abcdefghijklmnopqrstuvwxyz"
assert Floki.CSSEscape.escape("ABCDEFGHIJKLMNOPQRSTUVWXYZ") == "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
end

test "space and exclamation mark" do
assert Floki.CSSEscape.escape(<<0x20, 0x21, 0x78, 0x79>>) == "\\ \\!xy"
end

test "unicode characters" do
# astral symbol (U+1D306 TETRAGRAM FOR CENTRE)
assert Floki.CSSEscape.escape(<<0x1D306::utf8>>) == <<0x1D306::utf8>>
end
end
Loading