对 uri分析获取mime type和 raw data

data:text/html;base64,PCFET0NUWVBFIGh0bWw+DQo8aHRtDQo=
原始数据格式

data:text/html;base64,PCFET0NUWVBFIGh0bWw+DQo8aHRtbCBjbGFzcz0ibG9hZGluZyI+DQogIDxoZWFkPg0KICAgIDx0aXRsZT5jYW52YXNraXQgQVBJIOa1i+ivlTwvdGl0bGU+DQogICAgPG1ldGEgY2hhcnNldD0idXRmLTgiIC8+DQogIDwvaGVhZD4NCiAgPGJvZHkgc3R5bGU9ImZvbnQtZmFtaWx5OiBNaWNyb3NvZnQgWWFIZWkiPg0KICAgIDxjYW52YXMgaWQ9ImNvbnRlbnQiIHdpZHRoPSI0MDAwIiBoZWlnaHQ9IjQwMDAiPjwvY2FudmFzPg0KICAgIDxzY3JpcHQgdHlwZT0idGV4dC9qYXZhc2NyaXB0IiBzcmM9Ii4vYXBwL2Jpbi90ZXN0L2NhbnZhc2tpdC5qcyI+PC9zY3JpcHQ+DQogICAgPHNjcmlwdD4NCiAgICAgIGNvbnN0IGNrTG9hZGVkID0gQ2FudmFzS2l0SW5pdCh7DQogICAgICAgIGxvY2F0ZUZpbGU6IChmaWxlKSA9PiAiLi9hcHAvYmluL3Rlc3QvIiArIGZpbGUsDQogICAgICB9KTsNCg0KICAgICAgUHJvbWlzZS5hbGwoW2NrTG9hZGVkXSkudGhlbigoW0NhbnZhc0tpdF0pID0+IHsNCiAgICAgICAgY29uc3Qgc3VyZmFjZSA9IENhbnZhc0tpdC5NYWtlQ2FudmFzU3VyZmFjZSgiY29udGVudCIpOw0KICAgICAgICBpZiAoIXN1cmZhY2UpIHsNCiAgICAgICAgICBjb25zb2xlLmVycm9yKCJDb3VsZCBub3QgbWFrZSBzdXJmYWNlIik7DQogICAgICAgICAgcmV0dXJuOw0KICAgICAgICB9DQoNCiAgICAgICAgY29uc3QgY2FudmFzID0gc3VyZmFjZS5nZXRDYW52YXMoKTsNCg0KICAgICAgICB2YXIgb2xkX2RhdGEgPSBbMHgwMSwgMHgwMiwgMHgwM107DQogICAgICAgIHZhciBwYXRjaCA9IFsNCiAgICAgICAgICAweDQ3LCAweDQyLCAweDUzLCAweDQ0LCAweDQ5LCAweDQ2LCAweDM0LCAweDMyLCAweDAzLCAweGUyLCAweGZmLA0KICAgICAgICAgIDB4OGQsIDB4ZDIsIDB4MGEsIDB4MDMsIDB4ZWEsIDB4OWEsIDB4Y2EsIDB4MDksIDB4MGEsIDB4MDEsIDB4MDEsDQogICAgICAgICAgMHgwMSwgMHgwMSwgMHgwMCwgMHgwMiwgMHgwMCwgMHgwMCwgMHgwMCwgMHgwMCwgMHgwMSwgMHgwMiwgMHgwMSwNCiAgICAgICAgICAweDAxLCAweDA0LCAweDAzLA0KICAgICAgICBdOw0KICAgICAgICBwaWMgPSBDYW52YXNLaXQuQXBwbHlQYXRjaChvbGRfZGF0YSxwYXRjaCk7DQoNCiAgICAgICAgcGljID0gQ2FudmFzS2l0Lk1ha2VQaWN0dXJlKG9sZF9kYXRhKTsNCg0KICAgICAgICB7DQogICAgICAgICAgY2FudmFzLmRyYXdQaWN0dXJlKHBpYyk7DQogICAgICAgICAgc3VyZmFjZS5mbHVzaCgpOw0KICAgICAgICAgIHN1cmZhY2UuZmx1c2goKTsNCiAgICAgICAgfQ0KDQogICAgICAgIHN1cmZhY2UuZmx1c2goKTsNCiAgICAgIH0pOw0KICAgIDwvc2NyaXB0Pg0KICA8L2JvZHk+DQo8L2h0bWw+DQo=
解析方法内部实现参考:
// On success returns a pair of <mime_type, data>.
// On error returns a pair of <string, nullptr>.
// mime_type should be ignored if data is nullptr.
std::pair<std::string, scoped_refptr<base::RefCountedString>>
ParseEncodedImageData(const std::string& encoded_image_data) {
std::pair<std::string, scoped_refptr<base::RefCountedString>> result;
GURL encoded_image_uri(encoded_image_data);
if (!encoded_image_uri.is_valid() ||
!encoded_image_uri.SchemeIs(url::kDataScheme)) {
return result;
}
std::string content = encoded_image_uri.GetContent();
// The content should look like this: "image/png;base64,aaa..." (where
// "aaa..." is the base64-encoded image data).
size_t mime_type_end = content.find_first_of(';');
if (mime_type_end == std::string::npos)
return result;
std::string mime_type = content.substr(0, mime_type_end);
size_t base64_begin = mime_type_end + 1;
size_t base64_end = content.find_first_of(',', base64_begin);
if (base64_end == std::string::npos)
return result;
auto base64 = base::MakeStringPiece(content.begin() + base64_begin,
content.begin() + base64_end);
if (base64 != "base64")
return result;
size_t data_begin = base64_end + 1;
auto data =
base::MakeStringPiece(content.begin() + data_begin, content.end());
std::string decoded_data;
if (!base::Base64Decode(data, &decoded_data))
return result;
result.first = mime_type;
result.second =
base::MakeRefCounted<base::RefCountedString>(std::move(decoded_data));
return result;
}
D:\chromium110\chromium\src\net\base\data_url.cc :
bool DataURL::Parse(const GURL& url,
std::string* mime_type,
std::string* charset,
std::string* data) {
if (!url.is_valid() || !url.has_scheme())
return false;
DCHECK(mime_type->empty());
DCHECK(charset->empty());
DCHECK(!data || data->empty());
base::StringPiece content;
std::string content_string;
if (base::FeatureList::IsEnabled(base::features::kOptimizeDataUrls)) {
// Avoid copying the URL content which can be expensive for large URLs.
content = url.GetContentPiece();
} else {
content_string = url.GetContent();
content = content_string;
}
base::StringPiece::const_iterator comma = base::ranges::find(content, ',');
if (comma == content.end())
return false;
std::vector<base::StringPiece> meta_data =
base::SplitStringPiece(base::MakeStringPiece(content.begin(), comma), ";",
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
// These are moved to |mime_type| and |charset| on success.
std::string mime_type_value;
std::string charset_value;
auto iter = meta_data.cbegin();
if (iter != meta_data.cend()) {
mime_type_value = base::ToLowerASCII(*iter);
++iter;
}
static constexpr base::StringPiece kBase64Tag("base64");
static constexpr base::StringPiece kCharsetTag("charset=");
bool base64_encoded = false;
for (; iter != meta_data.cend(); ++iter) {
if (!base64_encoded &&
base::EqualsCaseInsensitiveASCII(*iter, kBase64Tag)) {
base64_encoded = true;
} else if (charset_value.empty() &&
base::StartsWith(*iter, kCharsetTag,
base::CompareCase::INSENSITIVE_ASCII)) {
charset_value = std::string(iter->substr(kCharsetTag.size()));
// The grammar for charset is not specially defined in RFC2045 and
// RFC2397. It just needs to be a token.
if (!HttpUtil::IsToken(charset_value))
return false;
}
}
if (mime_type_value.empty()) {
// Fallback to the default if nothing specified in the mediatype part as
// specified in RFC2045. As specified in RFC2397, we use |charset| even if
// |mime_type| is empty.
mime_type_value = "text/plain";
if (charset_value.empty())
charset_value = "US-ASCII";
} else if (!ParseMimeTypeWithoutParameter(mime_type_value, nullptr,
nullptr)) {
// Fallback to the default as recommended in RFC2045 when the mediatype
// value is invalid. For this case, we don't respect |charset| but force it
// set to "US-ASCII".
mime_type_value = "text/plain";
charset_value = "US-ASCII";
}
// The caller may not be interested in receiving the data.
if (data) {
// Preserve spaces if dealing with text or xml input, same as mozilla:
// https://bugzilla.mozilla.org/show_bug.cgi?id=138052
// but strip them otherwise:
// https://bugzilla.mozilla.org/show_bug.cgi?id=37200
// (Spaces in a data URL should be escaped, which is handled below, so any
// spaces now are wrong. People expect to be able to enter them in the URL
// bar for text, and it can't hurt, so we allow it.)
//
// TODO(mmenke): Is removing all spaces reasonable? GURL removes trailing
// spaces itself, anyways. Should we just trim leading spaces instead?
// Allowing random intermediary spaces seems unnecessary.
auto raw_body = base::MakeStringPiece(comma + 1, content.end());
// For base64, we may have url-escaped whitespace which is not part
// of the data, and should be stripped. Otherwise, the escaped whitespace
// could be part of the payload, so don't strip it.
if (base64_encoded) {
// If the data URL is well formed, we can decode it immediately.
if (base::FeatureList::IsEnabled(base::features::kOptimizeDataUrls) &&
IsDataURLReadyForDecode(raw_body)) {
if (!base::Base64Decode(raw_body, data))
return false;
} else {
std::string unescaped_body = base::UnescapeBinaryURLComponent(raw_body);
if (!base::Base64Decode(unescaped_body, data,
base::Base64DecodePolicy::kForgiving))
return false;
}
} else {
// Strip whitespace for non-text MIME types.
std::string temp;
if (!(mime_type_value.compare(0, 5, "text/") == 0 ||
mime_type_value.find("xml") != std::string::npos)) {
temp = std::string(raw_body);
base::EraseIf(temp, base::IsAsciiWhitespace<char>);
raw_body = temp;
}
*data = base::UnescapeBinaryURLComponent(raw_body);
}
}
*mime_type = std::move(mime_type_value);
*charset = std::move(charset_value);
return true;
}
解析方法2:
D:\chromium110\chromium\src\third_party\blink\renderer\platform\network\network_utils.cc
D:\chromium110\chromium\src\net\base\data_url_unittest.cc
#include "net/base/data_url.h"
#include "url/gurl.h"
#include "net/base/net_errors.h"
#include "net/http/http_response_headers.h"
std::string mime_type, charset, data;
std::unique_ptr<std::string> response_body;
if (net::DataURL::Parse(url_, &mime_type, &charset, &data))
response_body = std::make_unique<std::string>(std::move(data));
//////////////////////////////////////////
struct ParseTestData {
const char* url;
bool is_valid;
const char* mime_type;
const char* charset;
const std::string data;
};
} // namespace
TEST(DataURLTest, Parse) {
const ParseTestData tests[] = {
{"data:", false, "", "", ""},
{"data:,", true, "text/plain", "US-ASCII", ""},
}
for (const auto& test : tests) {
SCOPED_TRACE(test.url);
std::string mime_type;
std::string charset;
std::string data;
bool ok = DataURL::Parse(GURL(test.url), &mime_type, &charset, &data);
EXPECT_EQ(ok, test.is_valid);
EXPECT_EQ(test.mime_type, mime_type);
EXPECT_EQ(test.charset, charset);
EXPECT_EQ(test.data, data);
}
////////////////////////////////////////////////////////////
GURL url(provider.ConsumeRemainingBytesAsString());
std::string mime_type, charset, data;
if (!net::DataURL::Parse(url, &mime_type, &charset, &data)) {
// This case is only here to allow cluster fuzz pass any url,
// to unblock further fuzzing.
mime_type = "text/html";
charset = "UTF-8";
}
blink::WebNavigationParams::FillStaticResponse(
navigation_params.get(), blink::WebString::FromUTF8(mime_type),
blink::WebString::FromUTF8(charset), data);
////////////////////////////////////////
// Test a slightly larger data URL.
TEST(DataURLTest, Image) {
// Use our nice little Chrome logo.
GURL image_url(
"data:image/png;base64,"
"iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAADVklEQVQ4jX2TfUwUB"
"BjG3w1y+HGcd9dxhXR8T4awOccJGgOSWclHImznLkTlSw0DDQXkrmgYgbUYnlQTqQ"
"xIEVxitD5UMCATRA1CEEg+Qjw3bWDxIauJv/5oumqs39/P827vnucRmYN0gyF01GI"
"5MpCVdW0gO7tvNC+vqSEtbZefk5NuLv1jdJ46p/zw0HeH4+PHr3h7c1mjoV2t5rKz"
"Mx1+fg9bAgK6zHq9cU5z+LpA3xOtx34+vTeT21onRuzssC3zxbbSwC13d/pFuC7Ck"
"IMDxQpF7r/MWq12UctI1dWWm99ypqSYmRUBdKem8MkrO/kgaTt1O7YzlpzE5GIVd0"
"WYUqt57yWf2McHTObYPbVD+ZwbtlLTVMZ3BW+TnLyXLaWtmEq6WJVbT3HBh3Svj2H"
"QQcm43XwmtoYM6vVKleh0uoWvnzW3v3MpidruPTQPf0bia7sJOtBM0ufTWNvus/nk"
"DFHF9ZS+uYVjRUasMeHUmyLYtcklTvzWGFZnNOXczThvpKIzjcahSqIzkvDLayDq6"
"D3eOjtBbNUEIZYyqsvj4V4wY92eNJ4IoyhTbxXX1T5xsV9tm9r4TQwHLiZw/pdDZJ"
"ea8TKmsmR/K0uLh/GwnCHghTja6lPhphezPfO5/5MrVvMzNaI3+ERHfrFzPKQukrQ"
"GI4d/3EFD/3E2mVNYvi4at7CXWREaxZGD+3hg28zD3gVMd6q5c8GdosynKmSeRuGz"
"pjyl1/9UDGtPR5HeaKT8Wjo17WXk579BXVUhN64ehF9fhRtq/uxxZKzNiZFGD0wRC"
"3NFROZ5mwIPL/96K/rKMMLrIzF9uhHr+/sYH7DAbwlgC4J+R2Z7FUx1qLnV7MGF40"
"smVSoJ/jvHRfYhQeUJd/SnYtGWhPHR0Sz+GE2F2yth0B36Vcz2KpnufBJbsysjjW4"
"kblBUiIjiURUWqJY65zxbnTy57GQyH58zgy0QBtTQv5gH15XMdKkYu+TGaJMnlm2O"
"34uI4b9tflqp1+QEFGzoW/ulmcofcpkZCYJhDfSpme7QcrHa+Xfji8paEQkTkSfmm"
"oRWRNZr/F1KfVMjW+IKEnv2FwZfKdzt0BQR6lClcZR0EfEXEfv/G6W9iLiIyCoReV"
"5EnhORIBHx+ufPj/gLB/zGI/G4Bk0AAAAASUVORK5CYII=");
std::string mime_type;
std::string charset;
std::string data;
scoped_refptr<HttpResponseHeaders> headers;
EXPECT_EQ(OK, DataURL::BuildResponse(image_url, "GET", &mime_type, &charset,
&data, &headers));
EXPECT_EQ(911u, data.size());
EXPECT_EQ("image/png", mime_type);
EXPECT_TRUE(charset.empty());
ASSERT_TRUE(headers);
std::string value;
EXPECT_EQ(headers->GetStatusLine(), "HTTP/1.1 200 OK");
EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &value));
EXPECT_EQ(value, "image/png");
}
生成方法:
std::string MakeDataURIForImage(base::span<const uint8_t> image_data,
base::StringPiece mime_subtype) {
std::string result = "data:image/";
result.append(mime_subtype.begin(), mime_subtype.end());
result += ";base64,";
result += base::Base64Encode(image_data);
return result;
}