Avi Drissman | 6459548 | 2022-09-14 20:52:29 | [diff] [blame] | 1 | // Copyright 2014 The Chromium Authors |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "net/base/filename_util.h" |
| 6 | |
Matt Giuca | ee5e87f | 2018-06-07 04:18:41 | [diff] [blame] | 7 | #include <set> |
| 8 | |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 9 | #include "base/files/file_path.h" |
thestig | d8df033 | 2014-09-04 06:33:29 | [diff] [blame] | 10 | #include "base/files/file_util.h" |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 11 | #include "base/path_service.h" |
Ryan Hamilton | 7f3bd3d | 2022-04-23 00:07:39 | [diff] [blame] | 12 | #include "base/strings/escape.h" |
Peter Kasting | abc2bc3 | 2023-10-27 22:30:09 | [diff] [blame] | 13 | #include "base/strings/string_number_conversions.h" |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 14 | #include "base/strings/string_util.h" |
| 15 | #include "base/strings/sys_string_conversions.h" |
| 16 | #include "base/strings/utf_string_conversions.h" |
| 17 | #include "base/threading/thread_restrictions.h" |
Fabrice de Gans-Riberi | 7de4737 | 2018-05-08 20:23:47 | [diff] [blame] | 18 | #include "build/build_config.h" |
[email protected] | 37ed1b5 | 2014-05-09 17:09:00 | [diff] [blame] | 19 | #include "net/base/filename_util_internal.h" |
[email protected] | 3dc3a91 | 2014-04-29 20:58:12 | [diff] [blame] | 20 | #include "net/base/net_string_util.h" |
Tommy Li | 3187fae | 2019-11-14 20:04:22 | [diff] [blame] | 21 | #include "net/base/url_util.h" |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 22 | #include "net/http/http_content_disposition.h" |
| 23 | #include "url/gurl.h" |
| 24 | |
| 25 | namespace net { |
| 26 | |
[email protected] | 37ed1b5 | 2014-05-09 17:09:00 | [diff] [blame] | 27 | // Prefix to prepend to get a file URL. |
Peter Kasting | 72c5651 | 2021-01-27 23:45:53 | [diff] [blame] | 28 | static const char kFileURLPrefix[] = "file:///"; |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 29 | |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 30 | GURL FilePathToFileURL(const base::FilePath& path) { |
| 31 | // Produce a URL like "file:///C:/foo" for a regular file, or |
| 32 | // "file://///server/path" for UNC. The URL canonicalizer will fix up the |
| 33 | // latter case to be the canonical UNC form: "file://server/path" |
Peter Kasting | 72c5651 | 2021-01-27 23:45:53 | [diff] [blame] | 34 | std::string url_string(kFileURLPrefix); |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 35 | |
Joel Hockey | 6f7283f8 | 2020-11-10 01:11:00 | [diff] [blame] | 36 | // GURL() strips some whitespace and trailing control chars which are valid |
| 37 | // in file paths. It also interprets chars such as `%;#?` and maybe `\`, so we |
| 38 | // must percent encode these first. Reserve max possible length up front. |
Peter Kasting | 72c5651 | 2021-01-27 23:45:53 | [diff] [blame] | 39 | std::string utf8_path = path.AsUTF8Unsafe(); |
| 40 | url_string.reserve(url_string.size() + (3 * utf8_path.size())); |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 41 | |
Peter Kasting | 72c5651 | 2021-01-27 23:45:53 | [diff] [blame] | 42 | for (auto c : utf8_path) { |
Joel Hockey | 6f7283f8 | 2020-11-10 01:11:00 | [diff] [blame] | 43 | if (c == '%' || c == ';' || c == '#' || c == '?' || |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 44 | #if BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA) |
Joel Hockey | 6f7283f8 | 2020-11-10 01:11:00 | [diff] [blame] | 45 | c == '\\' || |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 46 | #endif |
Joel Hockey | 6f7283f8 | 2020-11-10 01:11:00 | [diff] [blame] | 47 | c <= ' ') { |
Joel Hockey | 6f7283f8 | 2020-11-10 01:11:00 | [diff] [blame] | 48 | url_string += '%'; |
Peter Kasting | abc2bc3 | 2023-10-27 22:30:09 | [diff] [blame] | 49 | base::AppendHexEncodedByte(static_cast<uint8_t>(c), url_string); |
Joel Hockey | 6f7283f8 | 2020-11-10 01:11:00 | [diff] [blame] | 50 | } else { |
| 51 | url_string += c; |
| 52 | } |
| 53 | } |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 54 | |
Peter Kasting | 72c5651 | 2021-01-27 23:45:53 | [diff] [blame] | 55 | return GURL(url_string); |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 56 | } |
| 57 | |
| 58 | bool FileURLToFilePath(const GURL& url, base::FilePath* file_path) { |
| 59 | *file_path = base::FilePath(); |
| 60 | base::FilePath::StringType& file_path_str = |
| 61 | const_cast<base::FilePath::StringType&>(file_path->value()); |
| 62 | file_path_str.clear(); |
| 63 | |
| 64 | if (!url.is_valid()) |
| 65 | return false; |
| 66 | |
Tommy Li | 3187fae | 2019-11-14 20:04:22 | [diff] [blame] | 67 | // We may want to change this to a CHECK in the future. |
| 68 | if (!url.SchemeIsFile()) |
| 69 | return false; |
| 70 | |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 71 | #if BUILDFLAG(IS_WIN) |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 72 | std::string path; |
| 73 | std::string host = url.host(); |
| 74 | if (host.empty()) { |
| 75 | // URL contains no host, the path is the filename. In this case, the path |
Chris Mumford | e8d8f33 | 2018-10-10 02:23:31 | [diff] [blame] | 76 | // will probably be preceded with a slash, as in "/C:/foo.txt", so we |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 77 | // trim out that here. |
| 78 | path = url.path(); |
| 79 | size_t first_non_slash = path.find_first_not_of("/\\"); |
| 80 | if (first_non_slash != std::string::npos && first_non_slash > 0) |
| 81 | path.erase(0, first_non_slash); |
| 82 | } else { |
Chris Mumford | e8d8f33 | 2018-10-10 02:23:31 | [diff] [blame] | 83 | // URL contains a host: this means it's UNC. We keep the preceding slash |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 84 | // on the path. |
| 85 | path = "\\\\"; |
| 86 | path.append(host); |
| 87 | path.append(url.path()); |
| 88 | } |
| 89 | std::replace(path.begin(), path.end(), '/', '\\'); |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 90 | #else // BUILDFLAG(IS_WIN) |
Tommy Li | 3187fae | 2019-11-14 20:04:22 | [diff] [blame] | 91 | // On POSIX, there's no obvious interpretation of file:// URLs with a host. |
| 92 | // Usually, remote mounts are still mounted onto the local filesystem. |
| 93 | // Therefore, we discard all URLs that are not obviously local to prevent |
| 94 | // spoofing attacks using file:// URLs. See crbug.com/881675. |
| 95 | if (!url.host().empty() && !net::IsLocalhost(url)) { |
| 96 | return false; |
| 97 | } |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 98 | std::string path = url.path(); |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 99 | #endif // !BUILDFLAG(IS_WIN) |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 100 | |
| 101 | if (path.empty()) |
| 102 | return false; |
| 103 | |
Matt Giuca | ee5e87f | 2018-06-07 04:18:41 | [diff] [blame] | 104 | // "%2F" ('/') results in failure, because it represents a literal '/' |
| 105 | // character in a path segment (not a path separator). If this were decoded, |
| 106 | // it would be interpreted as a path separator on both POSIX and Windows (note |
| 107 | // that Firefox *does* decode this, but it was decided on |
| 108 | // https://crbug.com/585422 that this represents a potential security risk). |
| 109 | // It isn't correct to keep it as "%2F", so this just fails. This is fine, |
| 110 | // because '/' is not a valid filename character on either POSIX or Windows. |
Hayato Ito | 8caa9bb3 | 2023-07-25 00:34:40 | [diff] [blame] | 111 | // |
| 112 | // A valid URL may include "%00" (NULL) in its path (see |
| 113 | // https://crbug.com/1400251), which is considered an illegal filename and |
| 114 | // results in failure. |
| 115 | std::set<unsigned char> illegal_encoded_bytes{'/', '\0'}; |
Matt Giuca | ee5e87f | 2018-06-07 04:18:41 | [diff] [blame] | 116 | |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 117 | #if BUILDFLAG(IS_WIN) |
Matt Giuca | ee5e87f | 2018-06-07 04:18:41 | [diff] [blame] | 118 | // "%5C" ('\\') on Windows results in failure, for the same reason as '/' |
| 119 | // above. On POSIX, "%5C" simply decodes as '\\', a valid filename character. |
| 120 | illegal_encoded_bytes.insert('\\'); |
| 121 | #endif |
| 122 | |
Weilun Shi | 40194033 | 2020-07-14 22:22:33 | [diff] [blame] | 123 | if (base::ContainsEncodedBytes(path, illegal_encoded_bytes)) |
Matt Giuca | ee5e87f | 2018-06-07 04:18:41 | [diff] [blame] | 124 | return false; |
| 125 | |
Ryan Sleevi | a9d6aa6 | 2019-07-26 13:32:18 | [diff] [blame] | 126 | // Unescape all percent-encoded sequences, including blocked-for-display |
Matt Giuca | ee5e87f | 2018-06-07 04:18:41 | [diff] [blame] | 127 | // characters, control characters and invalid UTF-8 byte sequences. |
| 128 | // Percent-encoded bytes are not meaningful in a file system. |
Weilun Shi | 40194033 | 2020-07-14 22:22:33 | [diff] [blame] | 129 | path = base::UnescapeBinaryURLComponent(path); |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 130 | |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 131 | #if BUILDFLAG(IS_WIN) |
[email protected] | 52796541 | 2014-05-07 14:38:26 | [diff] [blame] | 132 | if (base::IsStringUTF8(path)) { |
Jan Wilken Dörrie | 9720dce | 2020-07-21 17:14:23 | [diff] [blame] | 133 | file_path_str.assign(base::UTF8ToWide(path)); |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 134 | // We used to try too hard and see if |path| made up entirely of |
| 135 | // the 1st 256 characters in the Unicode was a zero-extended UTF-16. |
| 136 | // If so, we converted it to 'Latin-1' and checked if the result was UTF-8. |
| 137 | // If the check passed, we converted the result to UTF-8. |
| 138 | // Otherwise, we treated the result as the native OS encoding. |
| 139 | // However, that led to http://crbug.com/4619 and http://crbug.com/14153 |
| 140 | } else { |
| 141 | // Not UTF-8, assume encoding is native codepage and we're done. We know we |
| 142 | // are giving the conversion function a nonempty string, and it may fail if |
| 143 | // the given string is not in the current encoding and give us an empty |
| 144 | // string back. We detect this and report failure. |
Jan Wilken Dörrie | 9720dce | 2020-07-21 17:14:23 | [diff] [blame] | 145 | file_path_str = base::SysNativeMBToWide(path); |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 146 | } |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 147 | #else // BUILDFLAG(IS_WIN) |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 148 | // Collapse multiple path slashes into a single path slash. |
| 149 | std::string new_path; |
| 150 | do { |
| 151 | new_path = path; |
brettw | e6dae46 | 2015-06-24 20:54:45 | [diff] [blame] | 152 | base::ReplaceSubstringsAfterOffset(&new_path, 0, "//", "/"); |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 153 | path.swap(new_path); |
| 154 | } while (new_path != path); |
| 155 | |
| 156 | file_path_str.assign(path); |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 157 | #endif // !BUILDFLAG(IS_WIN) |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 158 | |
| 159 | return !file_path_str.empty(); |
| 160 | } |
| 161 | |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 162 | void GenerateSafeFileName(const std::string& mime_type, |
| 163 | bool ignore_extension, |
| 164 | base::FilePath* file_path) { |
| 165 | // Make sure we get the right file extension |
| 166 | EnsureSafeExtension(mime_type, ignore_extension, file_path); |
| 167 | |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 168 | #if BUILDFLAG(IS_WIN) |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 169 | // Prepend "_" to the file name if it's a reserved name |
| 170 | base::FilePath::StringType leaf_name = file_path->BaseName().value(); |
| 171 | DCHECK(!leaf_name.empty()); |
dhnishi | c28dfc3 | 2015-07-08 02:21:31 | [diff] [blame] | 172 | if (IsReservedNameOnWindows(leaf_name)) { |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 173 | leaf_name = base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name; |
| 174 | *file_path = file_path->DirName(); |
| 175 | if (file_path->value() == base::FilePath::kCurrentDirectory) { |
| 176 | *file_path = base::FilePath(leaf_name); |
| 177 | } else { |
| 178 | *file_path = file_path->Append(leaf_name); |
| 179 | } |
| 180 | } |
| 181 | #endif |
| 182 | } |
| 183 | |
dhnishi | c28dfc3 | 2015-07-08 02:21:31 | [diff] [blame] | 184 | bool IsReservedNameOnWindows(const base::FilePath::StringType& filename) { |
| 185 | // This list is taken from the MSDN article "Naming a file" |
| 186 | // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx |
| 187 | // I also added clock$ because GetSaveFileName seems to consider it as a |
| 188 | // reserved name too. |
| 189 | static const char* const known_devices[] = { |
| 190 | "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", |
| 191 | "com5", "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", |
| 192 | "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$"}; |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 193 | #if BUILDFLAG(IS_WIN) |
Jan Wilken Dörrie | 9720dce | 2020-07-21 17:14:23 | [diff] [blame] | 194 | std::string filename_lower = base::ToLowerASCII(base::WideToUTF8(filename)); |
Xiaohan Wang | 2a6845b | 2022-01-08 04:40:57 | [diff] [blame] | 195 | #elif BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA) |
brettw | 8e2106d | 2015-08-11 19:30:22 | [diff] [blame] | 196 | std::string filename_lower = base::ToLowerASCII(filename); |
dhnishi | c28dfc3 | 2015-07-08 02:21:31 | [diff] [blame] | 197 | #endif |
| 198 | |
Ryan Sleevi | 4625214fa | 2018-05-10 16:42:45 | [diff] [blame] | 199 | for (const char* const device : known_devices) { |
David Benjamin | 83e81bc | 2023-11-14 16:04:01 | [diff] [blame] | 200 | // Check for an exact match, or a "DEVICE." prefix. |
| 201 | size_t len = strlen(device); |
| 202 | if (filename_lower.starts_with(device) && |
| 203 | (filename_lower.size() == len || filename_lower[len] == '.')) { |
dhnishi | c28dfc3 | 2015-07-08 02:21:31 | [diff] [blame] | 204 | return true; |
markusheintz | 74e10b2 | 2016-07-08 13:19:36 | [diff] [blame] | 205 | } |
dhnishi | c28dfc3 | 2015-07-08 02:21:31 | [diff] [blame] | 206 | } |
| 207 | |
| 208 | static const char* const magic_names[] = { |
| 209 | // These file names are used by the "Customize folder" feature of the |
| 210 | // shell. |
| 211 | "desktop.ini", |
| 212 | "thumbs.db", |
| 213 | }; |
| 214 | |
Ryan Sleevi | 4625214fa | 2018-05-10 16:42:45 | [diff] [blame] | 215 | for (const char* const magic_name : magic_names) { |
| 216 | if (filename_lower == magic_name) |
dhnishi | c28dfc3 | 2015-07-08 02:21:31 | [diff] [blame] | 217 | return true; |
| 218 | } |
| 219 | |
| 220 | return false; |
| 221 | } |
| 222 | |
[email protected] | d96cf75 | 2014-04-09 04:05:28 | [diff] [blame] | 223 | } // namespace net |