diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e9359e915a..8895aa3c31a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ full changeset diff at the end of each section. Current Trunk ------------- +- Add support for Unicode paths on Windows (#4995) + v112 ---- diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 2758ee19897..84419e36687 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -9,6 +9,7 @@ set(support_SOURCES file.cpp istring.cpp path.cpp + pchar.cpp safe_integer.cpp threads.cpp utilities.cpp diff --git a/src/support/command-line.cpp b/src/support/command-line.cpp index 23ded0346d9..9d819fd9b4b 100644 --- a/src/support/command-line.cpp +++ b/src/support/command-line.cpp @@ -162,7 +162,33 @@ Options& Options::add_positional(const std::string& name, return *this; } -void Options::parse(int argc, const char* argv[]) { +// This function converts the platform-specific pchar arrays to char arrays, on +// windows by converting from UTF-16 to UTF-8, then calls parse2. +// +// Further processing is the performed on plain chars and strings. +// +// For arguments that represent paths, the reverse UTF-8 to UTF-16 encoding will +// be performed (on windows) by the fspath constructor. +// +// On non-windows this is just copying bytes around without conversion. +void Options::parse(int argc, const pchar* argv[]) { + std::vector> utf8_argv; + std::vector utf8_argv_ptrs; + + for (int i = 0; i != argc; ++i) { + pstring arg = pstring(argv[i]); + std::string utf8_arg = pstring_to_string(arg); + std::vector utf8_arg_vec(utf8_arg.begin(), utf8_arg.end()); + utf8_arg_vec.push_back('\0'); + auto ptr = utf8_arg_vec.data(); + utf8_argv.push_back(std::move(utf8_arg_vec)); + utf8_argv_ptrs.push_back(ptr); + } + + Options::parse2(argc, utf8_argv_ptrs.data()); +} + +void Options::parse2(int argc, const char* argv[]) { assert(argc > 0 && "expect at least program name as an argument"); size_t positionalsSeen = 0; auto dashes = [](const std::string& s) { diff --git a/src/support/command-line.h b/src/support/command-line.h index 354bbb58669..1922ad7a134 100644 --- a/src/support/command-line.h +++ b/src/support/command-line.h @@ -27,6 +27,7 @@ #include #include +#include "pchar.h" #include "wasm.h" namespace wasm { @@ -63,9 +64,11 @@ class Options { Options& add_positional(const std::string& name, Arguments arguments, const Action& action); - void parse(int argc, const char* argv[]); + void parse(int argc, const pchar* argv[]); private: + void parse2(int argc, const char* argv[]); + struct Option { std::string longName; std::string shortName; diff --git a/src/support/file.cpp b/src/support/file.cpp index cfd656391be..98d30239e0a 100644 --- a/src/support/file.cpp +++ b/src/support/file.cpp @@ -47,26 +47,26 @@ template<> std::string do_read_stdin::operator()() { } template -T wasm::read_file(const std::string& filename, Flags::BinaryOption binary) { - if (filename == "-") { +T wasm::read_file(const wasm::fspath& filename, Flags::BinaryOption binary) { + if (filename.stdpath() == "-") { return do_read_stdin{}(); } - BYN_TRACE("Loading '" << filename << "'...\n"); + BYN_TRACE("Loading '" << filename.stdpath() << "'...\n"); std::ifstream infile; std::ios_base::openmode flags = std::ifstream::in; if (binary == Flags::Binary) { flags |= std::ifstream::binary; } - infile.open(filename, flags); + infile.open(filename.stdpath(), flags); if (!infile.is_open()) { - Fatal() << "Failed opening '" << filename << "'"; + Fatal() << "Failed opening '" << filename.stdpath() << "'"; } infile.seekg(0, std::ios::end); std::streampos insize = infile.tellg(); if (uint64_t(insize) >= std::numeric_limits::max()) { // Building a 32-bit executable where size_t == 32 bits, we are not able to // create strings larger than 2^32 bytes in length, so must abort here. - Fatal() << "Failed opening '" << filename + Fatal() << "Failed opening '" << filename.stdpath() << "': Input file too large: " << insize << " bytes. Try rebuilding in 64-bit mode."; } @@ -87,47 +87,50 @@ T wasm::read_file(const std::string& filename, Flags::BinaryOption binary) { return input; } -std::string wasm::read_possible_response_file(const std::string& input) { - if (input.size() == 0 || input[0] != '@') { - return input; +std::string wasm::read_possible_response_file(const wasm::fspath& input) { + auto input_str = input.stdpath().native(); + if (input_str.size() == 0 || input_str[0] != '@') { + return wasm::pstring_to_string(input.stdpath().native()); } - return wasm::read_file(input.substr(1), Flags::Text); + auto input_substr = input_str.substr(1); + auto real_path = wasm::fspath::from_pstring(input_substr); + return wasm::read_file(real_path, Flags::Text); } // Explicit instantiations for the explicit specializations. -template std::string wasm::read_file<>(const std::string&, Flags::BinaryOption); -template std::vector wasm::read_file<>(const std::string&, +template std::string wasm::read_file<>(const wasm::fspath&, Flags::BinaryOption); +template std::vector wasm::read_file<>(const wasm::fspath&, Flags::BinaryOption); -wasm::Output::Output(const std::string& filename, Flags::BinaryOption binary) +wasm::Output::Output(const wasm::fspath& filename, Flags::BinaryOption binary) : outfile(), out([this, filename, binary]() { // Ensure a single return at the very end, to avoid clang-tidy warnings // about the types of different returns here. std::streambuf* buffer; - if (filename == "-" || filename.empty()) { + if (filename.stdpath() == "-" || filename.stdpath().empty()) { buffer = std::cout.rdbuf(); } else { - BYN_TRACE("Opening '" << filename << "'\n"); + BYN_TRACE("Opening '" << filename.stdpath() << "'\n"); auto flags = std::ofstream::out | std::ofstream::trunc; if (binary == Flags::Binary) { flags |= std::ofstream::binary; } - outfile.open(filename, flags); + outfile.open(filename.stdpath(), flags); if (!outfile.is_open()) { - Fatal() << "Failed opening '" << filename << "'"; + Fatal() << "Failed opening '" << filename.stdpath() << "'"; } buffer = outfile.rdbuf(); } return buffer; }()) {} -void wasm::copy_file(std::string input, std::string output) { - std::ifstream src(input, std::ios::binary); - std::ofstream dst(output, std::ios::binary); +void wasm::copy_file(wasm::fspath input, wasm::fspath output) { + std::ifstream src(input.stdpath(), std::ios::binary); + std::ofstream dst(output.stdpath(), std::ios::binary); dst << src.rdbuf(); } -size_t wasm::file_size(std::string filename) { - std::ifstream infile(filename, std::ifstream::ate | std::ifstream::binary); +size_t wasm::file_size(wasm::fspath filename) { + std::ifstream infile(filename.stdpath(), std::ifstream::ate | std::ifstream::binary); return infile.tellg(); } diff --git a/src/support/file.h b/src/support/file.h index ae91831c98f..93d716cbc7d 100644 --- a/src/support/file.h +++ b/src/support/file.h @@ -26,6 +26,8 @@ #include #include +#include "pchar.h" + namespace wasm { namespace Flags { @@ -35,23 +37,23 @@ enum BinaryOption { Binary, Text }; std::vector read_stdin(); template -T read_file(const std::string& filename, Flags::BinaryOption binary); +T read_file(const wasm::fspath& filename, Flags::BinaryOption binary); // Declare the valid explicit specializations. -extern template std::string read_file<>(const std::string&, +extern template std::string read_file<>(const wasm::fspath&, Flags::BinaryOption); -extern template std::vector read_file<>(const std::string&, +extern template std::vector read_file<>(const wasm::fspath&, Flags::BinaryOption); // Given a string which may be a response file (i.e., a filename starting // with "@"), if it is a response file read it and return that, or if it // is not a response file, return it as is. -std::string read_possible_response_file(const std::string&); +std::string read_possible_response_file(const wasm::fspath&); class Output { public: // An empty filename or "-" will open stdout instead. - Output(const std::string& filename, Flags::BinaryOption binary); + Output(const wasm::fspath& filename, Flags::BinaryOption binary); ~Output() = default; template std::ostream& operator<<(const T& v) { return out << v; } @@ -70,10 +72,10 @@ class Output { }; // Copies a file to another file -void copy_file(std::string input, std::string output); +void copy_file(wasm::fspath input, wasm::fspath output); // Retusn the size of a file -size_t file_size(std::string filename); +size_t file_size(wasm::fspath filename); } // namespace wasm diff --git a/src/support/main.h b/src/support/main.h new file mode 100644 index 00000000000..636fed24bac --- /dev/null +++ b/src/support/main.h @@ -0,0 +1,36 @@ +/* + * Copyright 2016 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Cross-platform definition of main. +// +// Users will write main like: +// +// int BYN_MAIN(int argc, const pchar* argv[]) { ... } +// + +#ifndef wasm_support_main_h +#define wasm_support_main_h + +#include "support/pchar.h" + +#ifdef _WIN32 +#define BYN_MAIN wmain +#else +#define BYN_MAIN main +#endif + +#endif // wasm_support_main_h diff --git a/src/support/pchar.cpp b/src/support/pchar.cpp new file mode 100644 index 00000000000..2cf250db1f8 --- /dev/null +++ b/src/support/pchar.cpp @@ -0,0 +1,94 @@ +/* + * Copyright 2015 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pchar.h" + +namespace wasm { + +#ifdef _WIN32 + +#include "windows.h" + +// The conversion functions here will always succeed, with invalid chars +// converted to replacement chars. If there are bugs here they should manifest +// in file-not-found errors and not something worse. + +wasm::pstring string_to_pstring(const std::string& s) { + auto inptr = s.data(); + auto inlen = s.size(); + auto outlen = MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, NULL, 0); + auto outstr = wasm::pstring(outlen, 0); + auto outptr = outstr.data(); + MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, outptr, outlen); + return outstr; +} + +std::string pstring_to_string(const wasm::pstring& s) { + auto inptr = s.data(); + auto inlen = s.size(); + auto outlen = WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, NULL, 0, NULL, NULL); + auto outstr = std::string(outlen, 0); + auto outptr = outstr.data(); + WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, outptr, outlen, NULL, NULL); + return outstr; +} + +#else + +wasm::pstring string_to_pstring(const std::string& s) { + return wasm::pstring(s); +} + +std::string pstring_to_string(const wasm::pstring& s) { + return std::string(s); +} + +#endif + +std::filesystem::path string_to_path(const std::string& s) { + auto pstring = wasm::string_to_pstring(s); + return std::filesystem::path(pstring); +} + +fspath::fspath(const std::string& path) { + inner_path = string_to_path(path); +} + +fspath::fspath(const char path[]) { + inner_path = string_to_path(std::string(path)); +} + +fspath::fspath(const wasm::fspath& path) { + inner_path = path.inner_path; +} + +fspath::fspath(const std::filesystem::path& path) { + inner_path = path; +} + +wasm::fspath fspath::from_pstring(const wasm::pstring& path) { + return fspath(std::filesystem::path(path)); +} + +wasm::fspath fspath::operator=(const wasm::fspath& path) const { + return wasm::fspath(path); +} + +const std::filesystem::path& fspath::stdpath() const { + return inner_path; +} + +} // namespace wasm diff --git a/src/support/pchar.h b/src/support/pchar.h new file mode 100644 index 00000000000..80d00baf5be --- /dev/null +++ b/src/support/pchar.h @@ -0,0 +1,102 @@ +/* + * Copyright 2015 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// The platform char / string type. +// +// Used entirely for managing unicode-correct paths on windows. +// + +#ifndef wasm_support_pchar_h +#define wasm_support_pchar_h + +#include + +namespace wasm { + +// The platform string type. +// +// basic_string on Windows, basic_string elsewhere +typedef std::filesystem::path::string_type pstring; +typedef std::filesystem::path::value_type pchar; + +// Conversion from string to pstring. +// +// On windows this performs a UTF-8 to UTF-16 conversion, on the assumption that +// the incoming string was previously created through pstring_to_string. +// +// If a non-UTF-8 string is passed as input, then the output will contain +// replacement characters. +// +// On non-windows this just copies the string.. +pstring string_to_pstring(const std::string& s); + +// Conversion from pstring to string. +// +// On windows this performs a UTF-16 to UTF-8 conversion, on the assumption that +// the pstring was received from the wmain function as UTF-16. +// +// If a non-UTF-16 string is passed as input, then the output will contain +// replacement characters. +// +// On non-windows this just copies the string. +std::string pstring_to_string(const pstring& s); + +// A light wrapper around std::filesystem::path +// +// This class only exists to avoid silent errors: the copy constructor performs +// conversion from UTF-8 on windows where the std::filesystem::path constructor +// silently does not. +// +// Using this in APIs instead of std::filesystem::path allows paths to be +// seamlessly and correctly constructed from strings without the possibility of +// silently forgetting a conversion from UTF-8. +// +// The above is true as long as all paths encoded as strings are UTF-8, which is +// true on windows if all CLI arguments are processed through the Options::parse +// method. +class fspath { +public: + fspath(): inner_path() { } + fspath(const std::string& path); + fspath(const char path[]); + fspath(const wasm::fspath& path); + + // This exists to satisfy one conversion in read_possible_response_file. + // + // We can't have a constructor from pstring - on windows pstring + // and string are the same type. + // + // We could also make the private constructor from filesystem::path public, + // and the compiler would use it as an implicit conversion from pstring; but + // because filesystem::path also has a lossy conversion from string that + // motivates the existence of this class, we choose to hide that conversion + // and use this explicit static method. + static wasm::fspath from_pstring(const wasm::pstring& path); + + wasm::fspath operator=(const wasm::fspath& path) const; + + const std::filesystem::path& stdpath() const; + +private: + fspath(const std::filesystem::path& path); + + std::filesystem::path inner_path; +}; + +} // namespace wasm + +#endif // wasm_support_pchar_h diff --git a/src/tools/wasm-as.cpp b/src/tools/wasm-as.cpp index cc4f6fda2f9..941085e4f72 100644 --- a/src/tools/wasm-as.cpp +++ b/src/tools/wasm-as.cpp @@ -20,6 +20,7 @@ #include "support/colors.h" #include "support/file.h" +#include "support/main.h" #include "wasm-io.h" #include "wasm-s-parser.h" #include "wasm-validator.h" @@ -29,7 +30,7 @@ using namespace wasm; -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const pchar* argv[]) { bool debugInfo = false; std::string symbolMap; std::string sourceMapFilename; diff --git a/src/tools/wasm-ctor-eval.cpp b/src/tools/wasm-ctor-eval.cpp index 40abe2f76f0..7925a1869f8 100644 --- a/src/tools/wasm-ctor-eval.cpp +++ b/src/tools/wasm-ctor-eval.cpp @@ -33,6 +33,7 @@ #include "pass.h" #include "support/colors.h" #include "support/file.h" +#include "support/main.h" #include "support/small_set.h" #include "support/string.h" #include "tool-options.h" @@ -884,7 +885,7 @@ static bool canEval(Module& wasm) { // main // -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const pchar* argv[]) { Name entry; std::vector passes; bool emitBinary = true; diff --git a/src/tools/wasm-dis.cpp b/src/tools/wasm-dis.cpp index f9f30335963..b6efba9902a 100644 --- a/src/tools/wasm-dis.cpp +++ b/src/tools/wasm-dis.cpp @@ -20,13 +20,14 @@ #include "support/colors.h" #include "support/file.h" +#include "support/main.h" #include "wasm-io.h" #include "tool-options.h" using namespace wasm; -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const pchar* argv[]) { std::string sourceMapFilename; const std::string WasmDisOption = "wasm-dis options"; diff --git a/src/tools/wasm-emscripten-finalize.cpp b/src/tools/wasm-emscripten-finalize.cpp index 0f63e8112d1..debcfbeec9e 100644 --- a/src/tools/wasm-emscripten-finalize.cpp +++ b/src/tools/wasm-emscripten-finalize.cpp @@ -26,6 +26,7 @@ #include "support/colors.h" #include "support/debug.h" #include "support/file.h" +#include "support/main.h" #include "tool-options.h" #include "wasm-binary.h" #include "wasm-emscripten.h" @@ -36,7 +37,7 @@ using namespace wasm; -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const pchar* argv[]) { const uint64_t INVALID_BASE = -1; std::string infile; diff --git a/src/tools/wasm-fuzz-types.cpp b/src/tools/wasm-fuzz-types.cpp index 0e40e7a15ea..d07026c295d 100644 --- a/src/tools/wasm-fuzz-types.cpp +++ b/src/tools/wasm-fuzz-types.cpp @@ -20,6 +20,7 @@ #include #include "support/command-line.h" +#include "support/main.h" #include "tools/fuzzing/heap-types.h" #include "tools/fuzzing/random.h" #include "wasm-type-printing.h" @@ -491,7 +492,7 @@ void Fuzzer::checkCanonicalization() { } // namespace wasm -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const wasm::pchar* argv[]) { using namespace wasm; const std::string WasmFuzzTypesOption = "wasm-fuzz-types options"; diff --git a/src/tools/wasm-metadce.cpp b/src/tools/wasm-metadce.cpp index 029d4aae8ba..56ffee742af 100644 --- a/src/tools/wasm-metadce.cpp +++ b/src/tools/wasm-metadce.cpp @@ -33,6 +33,7 @@ #include "support/colors.h" #include "support/file.h" #include "support/json.h" +#include "support/main.h" #include "tool-options.h" #include "wasm-builder.h" #include "wasm-io.h" @@ -416,7 +417,7 @@ struct MetaDCEGraph { // main // -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const pchar* argv[]) { Name entry; std::vector passes; bool emitBinary = true; diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp index 0d279dc0864..cbc4a4375e0 100644 --- a/src/tools/wasm-opt.cpp +++ b/src/tools/wasm-opt.cpp @@ -31,6 +31,7 @@ #include "support/command-line.h" #include "support/debug.h" #include "support/file.h" +#include "support/main.h" #include "wasm-binary.h" #include "wasm-interpreter.h" #include "wasm-io.h" @@ -73,7 +74,7 @@ willRemoveDebugInfo(const std::vector& passes) { // main // -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const pchar* argv[]) { Name entry; bool emitBinary = true; bool converge = false; diff --git a/src/tools/wasm-reduce.cpp b/src/tools/wasm-reduce.cpp index 078eca64813..77050f03714 100644 --- a/src/tools/wasm-reduce.cpp +++ b/src/tools/wasm-reduce.cpp @@ -36,6 +36,7 @@ #include "support/command-line.h" #include "support/file.h" #include "support/hash.h" +#include "support/main.h" #include "support/path.h" #include "support/timing.h" #include "tool-options.h" @@ -1183,10 +1184,10 @@ struct Reducer // main // -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const pchar* argv[]) { std::string input, test, working, command; // By default, look for binaries alongside our own binary. - std::string binDir = Path::getDirName(argv[0]); + std::string binDir = Path::getDirName(pstring_to_string(argv[0])); bool binary = true, deNan = false, verbose = false, debugInfo = false, force = false; diff --git a/src/tools/wasm-shell.cpp b/src/tools/wasm-shell.cpp index 70f52a0380f..54ddb36d93e 100644 --- a/src/tools/wasm-shell.cpp +++ b/src/tools/wasm-shell.cpp @@ -27,6 +27,7 @@ #include "shell-interface.h" #include "support/command-line.h" #include "support/file.h" +#include "support/main.h" #include "wasm-interpreter.h" #include "wasm-s-parser.h" #include "wasm-validator.h" @@ -396,7 +397,7 @@ class Shell { } }; -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const pchar* argv[]) { Name entry; std::set skipped; diff --git a/src/tools/wasm-split/split-options.cpp b/src/tools/wasm-split/split-options.cpp index b166b575cc6..63decfb74e3 100644 --- a/src/tools/wasm-split/split-options.cpp +++ b/src/tools/wasm-split/split-options.cpp @@ -436,7 +436,7 @@ bool WasmSplitOptions::validate() { return valid; } -void WasmSplitOptions::parse(int argc, const char* argv[]) { +void WasmSplitOptions::parse(int argc, const pchar* argv[]) { ToolOptions::parse(argc, argv); // Since --quiet is defined in ToolOptions but --verbose is defined here, // --quiet doesn't know to unset --verbose. Fix it up here. diff --git a/src/tools/wasm-split/split-options.h b/src/tools/wasm-split/split-options.h index 6aa5b001117..40c22ffbabb 100644 --- a/src/tools/wasm-split/split-options.h +++ b/src/tools/wasm-split/split-options.h @@ -91,7 +91,7 @@ struct WasmSplitOptions : ToolOptions { Arguments arguments, const Action& action); bool validate(); - void parse(int argc, const char* argv[]); + void parse(int argc, const pchar* argv[]); }; } // namespace wasm diff --git a/src/tools/wasm-split/wasm-split.cpp b/src/tools/wasm-split/wasm-split.cpp index bea3ddce71b..1288849cf7b 100644 --- a/src/tools/wasm-split/wasm-split.cpp +++ b/src/tools/wasm-split/wasm-split.cpp @@ -22,6 +22,7 @@ #include "ir/module-splitting.h" #include "ir/names.h" #include "support/file.h" +#include "support/main.h" #include "support/name.h" #include "support/path.h" #include "support/utilities.h" @@ -465,7 +466,7 @@ void printReadableProfile(const WasmSplitOptions& options) { } // anonymous namespace -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const pchar* argv[]) { WasmSplitOptions options; options.parse(argc, argv); diff --git a/src/tools/wasm2js.cpp b/src/tools/wasm2js.cpp index 79eee56a0a6..be058fca187 100644 --- a/src/tools/wasm2js.cpp +++ b/src/tools/wasm2js.cpp @@ -24,6 +24,7 @@ #include "support/colors.h" #include "support/command-line.h" #include "support/file.h" +#include "support/main.h" #include "wasm-s-parser.h" using namespace cashew; @@ -877,7 +878,7 @@ void AssertionEmitter::emit() { // Main -int main(int argc, const char* argv[]) { +int BYN_MAIN(int argc, const pchar* argv[]) { Wasm2JSBuilder::Flags flags; const std::string Wasm2JSOption = "wasm2js options"; diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 4d9f4f5aed9..b691a8dda77 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -29,6 +29,7 @@ #include "ir/module-utils.h" #include "parsing.h" #include "support/debug.h" +#include "support/pchar.h" #include "wasm-builder.h" #include "wasm-traversal.h" #include "wasm-validator.h" @@ -1296,7 +1297,7 @@ class WasmBinaryWriter { sourceMap = set; sourceMapUrl = url; } - void setSymbolMap(std::string set) { symbolMap = set; } + void setSymbolMap(wasm::fspath set) { symbolMap = set; } void write(); void writeHeader(); @@ -1393,7 +1394,7 @@ class WasmBinaryWriter { std::ostream* sourceMap = nullptr; std::string sourceMapUrl; - std::string symbolMap; + wasm::fspath symbolMap; MixedArena allocator; diff --git a/src/wasm-io.h b/src/wasm-io.h index ae66c39320d..0be6d5902c6 100644 --- a/src/wasm-io.h +++ b/src/wasm-io.h @@ -58,17 +58,17 @@ class ModuleReader : public ModuleIOBase { } // read text - void readText(std::string filename, Module& wasm); + void readText(wasm::fspath filename, Module& wasm); // read binary - void readBinary(std::string filename, + void readBinary(wasm::fspath filename, Module& wasm, - std::string sourceMapFilename = ""); + wasm::fspath sourceMapFilename = ""); // read text or binary, checking the contents for what it is. If `filename` is // empty, read from stdin. void - read(std::string filename, Module& wasm, std::string sourceMapFilename = ""); + read(wasm::fspath filename, Module& wasm, wasm::fspath sourceMapFilename = ""); // check whether a file is a wasm binary - bool isBinaryFile(std::string filename); + bool isBinaryFile(wasm::fspath filename); private: bool DWARF = false; @@ -77,11 +77,11 @@ class ModuleReader : public ModuleIOBase { bool skipFunctionBodies = false; - void readStdin(Module& wasm, std::string sourceMapFilename); + void readStdin(Module& wasm, wasm::fspath sourceMapFilename); void readBinaryData(std::vector& input, Module& wasm, - std::string sourceMapFilename); + wasm::fspath sourceMapFilename); }; class ModuleWriter : public ModuleIOBase { @@ -90,8 +90,8 @@ class ModuleWriter : public ModuleIOBase { // TODO: Remove `emitModuleName`. See the comment in wasm-binary.h bool emitModuleName = false; - std::string symbolMap; - std::string sourceMapFilename; + wasm::fspath symbolMap; + wasm::fspath sourceMapFilename; std::string sourceMapUrl; public: @@ -100,8 +100,8 @@ class ModuleWriter : public ModuleIOBase { ModuleWriter() { setDebugInfo(false); } void setBinary(bool binary_) { binary = binary_; } - void setSymbolMap(std::string symbolMap_) { symbolMap = symbolMap_; } - void setSourceMapFilename(std::string sourceMapFilename_) { + void setSymbolMap(wasm::fspath symbolMap_) { symbolMap = symbolMap_; } + void setSourceMapFilename(wasm::fspath sourceMapFilename_) { sourceMapFilename = sourceMapFilename_; } void setSourceMapUrl(std::string sourceMapUrl_) { @@ -111,15 +111,15 @@ class ModuleWriter : public ModuleIOBase { // write text void writeText(Module& wasm, Output& output); - void writeText(Module& wasm, std::string filename); + void writeText(Module& wasm, wasm::fspath filename); // write binary void writeBinary(Module& wasm, Output& output); - void writeBinary(Module& wasm, std::string filename); + void writeBinary(Module& wasm, wasm::fspath filename); // write text or binary, defaulting to binary unless setBinary(false), // and unless there is no output file (in which case we write text // to stdout). void write(Module& wasm, Output& output); - void write(Module& wasm, std::string filename); + void write(Module& wasm, wasm::fspath filename); }; } // namespace wasm diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index b616b5a31d2..0fcc1c487f4 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -70,7 +70,7 @@ void WasmBinaryWriter::write() { if (sourceMap && !sourceMapUrl.empty()) { writeSourceMapUrl(); } - if (symbolMap.size() > 0) { + if (symbolMap.stdpath().native().size() > 0) { writeSymbolMap(); } @@ -1108,7 +1108,7 @@ void WasmBinaryWriter::writeSourceMapUrl() { } void WasmBinaryWriter::writeSymbolMap() { - std::ofstream file(symbolMap); + std::ofstream file(symbolMap.stdpath()); auto write = [&](Function* func) { file << getFunctionIndex(func->name) << ":" << func->name.str << std::endl; }; diff --git a/src/wasm/wasm-io.cpp b/src/wasm/wasm-io.cpp index 90f267e9be4..27344a89774 100644 --- a/src/wasm/wasm-io.cpp +++ b/src/wasm/wasm-io.cpp @@ -51,15 +51,15 @@ static void readTextData(std::string& input, Module& wasm, IRProfile profile) { } } -void ModuleReader::readText(std::string filename, Module& wasm) { - BYN_TRACE("reading text from " << filename << "\n"); +void ModuleReader::readText(wasm::fspath filename, Module& wasm) { + BYN_TRACE("reading text from " << filename.stdpath() << "\n"); auto input(read_file(filename, Flags::Text)); readTextData(input, wasm, profile); } void ModuleReader::readBinaryData(std::vector& input, Module& wasm, - std::string sourceMapFilename) { + wasm::fspath sourceMapFilename) { std::unique_ptr sourceMapStream; // Assume that the wasm has had its initial features applied, and use those // while parsing. @@ -67,9 +67,9 @@ void ModuleReader::readBinaryData(std::vector& input, parser.setDebugInfo(debugInfo); parser.setDWARF(DWARF); parser.setSkipFunctionBodies(skipFunctionBodies); - if (sourceMapFilename.size()) { + if (sourceMapFilename.stdpath().native().size()) { sourceMapStream = make_unique(); - sourceMapStream->open(sourceMapFilename); + sourceMapStream->open(sourceMapFilename.stdpath()); parser.setDebugLocations(sourceMapStream.get()); } parser.read(); @@ -78,18 +78,18 @@ void ModuleReader::readBinaryData(std::vector& input, } } -void ModuleReader::readBinary(std::string filename, +void ModuleReader::readBinary(wasm::fspath filename, Module& wasm, - std::string sourceMapFilename) { - BYN_TRACE("reading binary from " << filename << "\n"); + wasm::fspath sourceMapFilename) { + BYN_TRACE("reading binary from " << filename.stdpath() << "\n"); auto input(read_file>(filename, Flags::Binary)); readBinaryData(input, wasm, sourceMapFilename); } -bool ModuleReader::isBinaryFile(std::string filename) { +bool ModuleReader::isBinaryFile(wasm::fspath filename) { std::ifstream infile; std::ios_base::openmode flags = std::ifstream::in | std::ifstream::binary; - infile.open(filename, flags); + infile.open(filename.stdpath(), flags); char buffer[4] = {1, 2, 3, 4}; infile.read(buffer, 4); infile.close(); @@ -97,11 +97,11 @@ bool ModuleReader::isBinaryFile(std::string filename) { buffer[3] == 'm'; } -void ModuleReader::read(std::string filename, +void ModuleReader::read(wasm::fspath filename, Module& wasm, - std::string sourceMapFilename) { + wasm::fspath sourceMapFilename) { // empty filename or "-" means read from stdin - if (!filename.size() || filename == "-") { + if (!filename.stdpath().native().size() || filename.stdpath() == "-") { readStdin(wasm, sourceMapFilename); return; } @@ -109,7 +109,7 @@ void ModuleReader::read(std::string filename, readBinary(filename, wasm, sourceMapFilename); } else { // default to text - if (sourceMapFilename.size()) { + if (sourceMapFilename.stdpath().native().size()) { std::cerr << "Binaryen ModuleReader::read() - source map filename " "provided, but file appears to not be binary\n"; } @@ -119,7 +119,7 @@ void ModuleReader::read(std::string filename, // TODO: reading into a vector then copying into a string is unnecessarily // inefficient. It would be better to read just once into a stringstream. -void ModuleReader::readStdin(Module& wasm, std::string sourceMapFilename) { +void ModuleReader::readStdin(Module& wasm, wasm::fspath sourceMapFilename) { std::vector input = read_stdin(); if (input.size() >= 4 && input[0] == '\0' && input[1] == 'a' && input[2] == 's' && input[3] == 'm') { @@ -140,8 +140,8 @@ void ModuleWriter::writeText(Module& wasm, Output& output) { output.getStream() << wasm; } -void ModuleWriter::writeText(Module& wasm, std::string filename) { - BYN_TRACE("writing text to " << filename << "\n"); +void ModuleWriter::writeText(Module& wasm, wasm::fspath filename) { + BYN_TRACE("writing text to " << filename.stdpath() << "\n"); Output output(filename, Flags::Text); writeText(wasm, output); } @@ -155,12 +155,12 @@ void ModuleWriter::writeBinary(Module& wasm, Output& output) { writer.setEmitModuleName(true); } std::unique_ptr sourceMapStream; - if (sourceMapFilename.size()) { + if (sourceMapFilename.stdpath().native().size()) { sourceMapStream = make_unique(); - sourceMapStream->open(sourceMapFilename); + sourceMapStream->open(sourceMapFilename.stdpath()); writer.setSourceMap(sourceMapStream.get(), sourceMapUrl); } - if (symbolMap.size() > 0) { + if (symbolMap.stdpath().native().size() > 0) { writer.setSymbolMap(symbolMap); } writer.write(); @@ -170,8 +170,8 @@ void ModuleWriter::writeBinary(Module& wasm, Output& output) { } } -void ModuleWriter::writeBinary(Module& wasm, std::string filename) { - BYN_TRACE("writing binary to " << filename << "\n"); +void ModuleWriter::writeBinary(Module& wasm, wasm::fspath filename) { + BYN_TRACE("writing binary to " << filename.stdpath() << "\n"); Output output(filename, Flags::Binary); writeBinary(wasm, output); } @@ -184,8 +184,8 @@ void ModuleWriter::write(Module& wasm, Output& output) { } } -void ModuleWriter::write(Module& wasm, std::string filename) { - if (binary && filename.size() > 0) { +void ModuleWriter::write(Module& wasm, wasm::fspath filename) { + if (binary && filename.stdpath().native().size() > 0) { writeBinary(wasm, filename); } else { writeText(wasm, filename); diff --git "a/test/unicode_\342\235\244\357\270\217.c" "b/test/unicode_\342\235\244\357\270\217.c" new file mode 100644 index 00000000000..ce0cca4630e --- /dev/null +++ "b/test/unicode_\342\235\244\357\270\217.c" @@ -0,0 +1,6 @@ +#include + +int main() { + printf("hello, world!\n"); + return 0; +} diff --git "a/test/unicode_\342\235\244\357\270\217.txt" "b/test/unicode_\342\235\244\357\270\217.txt" new file mode 100644 index 00000000000..30527bf5b33 --- /dev/null +++ "b/test/unicode_\342\235\244\357\270\217.txt" @@ -0,0 +1,2 @@ +hello, world! + diff --git "a/test/unicode_\342\235\244\357\270\217.wast" "b/test/unicode_\342\235\244\357\270\217.wast" new file mode 100644 index 00000000000..ee42dc27b97 --- /dev/null +++ "b/test/unicode_\342\235\244\357\270\217.wast" @@ -0,0 +1,11 @@ +(module + (type $i32_i32_=>_i32 (func (param i32 i32) (result i32))) + (memory $0 256 256) + (export "add" (func $add)) + (func $add (type $i32_i32_=>_i32) (param $x i32) (param $y i32) (result i32) + (i32.add + (local.get $x) + (local.get $y) + ) + ) +) diff --git "a/test/unicode_\342\235\244\357\270\217.wast.from-wast" "b/test/unicode_\342\235\244\357\270\217.wast.from-wast" new file mode 100644 index 00000000000..ee42dc27b97 --- /dev/null +++ "b/test/unicode_\342\235\244\357\270\217.wast.from-wast" @@ -0,0 +1,11 @@ +(module + (type $i32_i32_=>_i32 (func (param i32 i32) (result i32))) + (memory $0 256 256) + (export "add" (func $add)) + (func $add (type $i32_i32_=>_i32) (param $x i32) (param $y i32) (result i32) + (i32.add + (local.get $x) + (local.get $y) + ) + ) +) diff --git "a/test/unicode_\342\235\244\357\270\217.wast.fromBinary" "b/test/unicode_\342\235\244\357\270\217.wast.fromBinary" new file mode 100644 index 00000000000..1518d9cc43d --- /dev/null +++ "b/test/unicode_\342\235\244\357\270\217.wast.fromBinary" @@ -0,0 +1,12 @@ +(module + (type $i32_i32_=>_i32 (func (param i32 i32) (result i32))) + (memory $0 256 256) + (export "add" (func $add)) + (func $add (type $i32_i32_=>_i32) (param $x i32) (param $y i32) (result i32) + (i32.add + (local.get $x) + (local.get $y) + ) + ) +) + diff --git "a/test/unicode_\342\235\244\357\270\217.wast.fromBinary.noDebugInfo" "b/test/unicode_\342\235\244\357\270\217.wast.fromBinary.noDebugInfo" new file mode 100644 index 00000000000..fe33936c67a --- /dev/null +++ "b/test/unicode_\342\235\244\357\270\217.wast.fromBinary.noDebugInfo" @@ -0,0 +1,12 @@ +(module + (type $i32_i32_=>_i32 (func (param i32 i32) (result i32))) + (memory $0 256 256) + (export "add" (func $0)) + (func $0 (type $i32_i32_=>_i32) (param $0 i32) (param $1 i32) (result i32) + (i32.add + (local.get $0) + (local.get $1) + ) + ) +) + diff --git "a/test/unicode_\342\235\244\357\270\217.wat" "b/test/unicode_\342\235\244\357\270\217.wat" new file mode 100644 index 00000000000..680ee809aba --- /dev/null +++ "b/test/unicode_\342\235\244\357\270\217.wat" @@ -0,0 +1,11 @@ +(module + (type $i32_i32_=>_i32 (func (param i32 i32) (result i32))) + (memory $0 256 256) + (export "add" (func $add)) + (func $add (param $x i32) (param $y i32) (result i32) + (i32.add + (local.get $x) + (local.get $y) + ) + ) +)