对 uri分析获取mime type和 raw data

data:text/html;base64,PCFET0NUWVBFIGh0bWw+DQo8aHRtDQo=
原始数据格式
 data:text/html;base64,PCFET0NUWVBFIGh0bWw+DQo8aHRtbCBjbGFzcz0ibG9hZGluZyI+DQogIDxoZWFkPg0KICAgIDx0aXRsZT5jYW52YXNraXQgQVBJIOa1i+ivlTwvdGl0bGU+DQogICAgPG1ldGEgY2hhcnNldD0idXRmLTgiIC8+DQogIDwvaGVhZD4NCiAgPGJvZHkgc3R5bGU9ImZvbnQtZmFtaWx5OiBNaWNyb3NvZnQgWWFIZWkiPg0KICAgIDxjYW52YXMgaWQ9ImNvbnRlbnQiIHdpZHRoPSI0MDAwIiBoZWlnaHQ9IjQwMDAiPjwvY2FudmFzPg0KICAgIDxzY3JpcHQgdHlwZT0idGV4dC9qYXZhc2NyaXB0IiBzcmM9Ii4vYXBwL2Jpbi90ZXN0L2NhbnZhc2tpdC5qcyI+PC9zY3JpcHQ+DQogICAgPHNjcmlwdD4NCiAgICAgIGNvbnN0IGNrTG9hZGVkID0gQ2FudmFzS2l0SW5pdCh7DQogICAgICAgIGxvY2F0ZUZpbGU6IChmaWxlKSA9PiAiLi9hcHAvYmluL3Rlc3QvIiArIGZpbGUsDQogICAgICB9KTsNCg0KICAgICAgUHJvbWlzZS5hbGwoW2NrTG9hZGVkXSkudGhlbigoW0NhbnZhc0tpdF0pID0+IHsNCiAgICAgICAgY29uc3Qgc3VyZmFjZSA9IENhbnZhc0tpdC5NYWtlQ2FudmFzU3VyZmFjZSgiY29udGVudCIpOw0KICAgICAgICBpZiAoIXN1cmZhY2UpIHsNCiAgICAgICAgICBjb25zb2xlLmVycm9yKCJDb3VsZCBub3QgbWFrZSBzdXJmYWNlIik7DQogICAgICAgICAgcmV0dXJuOw0KICAgICAgICB9DQoNCiAgICAgICAgY29uc3QgY2FudmFzID0gc3VyZmFjZS5nZXRDYW52YXMoKTsNCg0KICAgICAgICB2YXIgb2xkX2RhdGEgPSBbMHgwMSwgMHgwMiwgMHgwM107DQogICAgICAgIHZhciBwYXRjaCA9IFsNCiAgICAgICAgICAweDQ3LCAweDQyLCAweDUzLCAweDQ0LCAweDQ5LCAweDQ2LCAweDM0LCAweDMyLCAweDAzLCAweGUyLCAweGZmLA0KICAgICAgICAgIDB4OGQsIDB4ZDIsIDB4MGEsIDB4MDMsIDB4ZWEsIDB4OWEsIDB4Y2EsIDB4MDksIDB4MGEsIDB4MDEsIDB4MDEsDQogICAgICAgICAgMHgwMSwgMHgwMSwgMHgwMCwgMHgwMiwgMHgwMCwgMHgwMCwgMHgwMCwgMHgwMCwgMHgwMSwgMHgwMiwgMHgwMSwNCiAgICAgICAgICAweDAxLCAweDA0LCAweDAzLA0KICAgICAgICBdOw0KICAgICAgICBwaWMgPSBDYW52YXNLaXQuQXBwbHlQYXRjaChvbGRfZGF0YSxwYXRjaCk7DQoNCiAgICAgICAgcGljID0gQ2FudmFzS2l0Lk1ha2VQaWN0dXJlKG9sZF9kYXRhKTsNCg0KICAgICAgICB7DQogICAgICAgICAgY2FudmFzLmRyYXdQaWN0dXJlKHBpYyk7DQogICAgICAgICAgc3VyZmFjZS5mbHVzaCgpOw0KICAgICAgICAgIHN1cmZhY2UuZmx1c2goKTsNCiAgICAgICAgfQ0KDQogICAgICAgIHN1cmZhY2UuZmx1c2goKTsNCiAgICAgIH0pOw0KICAgIDwvc2NyaXB0Pg0KICA8L2JvZHk+DQo8L2h0bWw+DQo=
解析方法内部实现参考:
// On success returns a pair of <mime_type, data>. // On error returns a pair of <string, nullptr>. // mime_type should be ignored if data is nullptr. std::pair<std::string, scoped_refptr<base::RefCountedString>> ParseEncodedImageData(const std::string& encoded_image_data) { std::pair<std::string, scoped_refptr<base::RefCountedString>> result; GURL encoded_image_uri(encoded_image_data); if (!encoded_image_uri.is_valid() || !encoded_image_uri.SchemeIs(url::kDataScheme)) { return result; } std::string content = encoded_image_uri.GetContent(); // The content should look like this: "image/png;base64,aaa..." (where // "aaa..." is the base64-encoded image data). size_t mime_type_end = content.find_first_of(';'); if (mime_type_end == std::string::npos) return result; std::string mime_type = content.substr(0, mime_type_end); size_t base64_begin = mime_type_end + 1; size_t base64_end = content.find_first_of(',', base64_begin); if (base64_end == std::string::npos) return result; auto base64 = base::MakeStringPiece(content.begin() + base64_begin, content.begin() + base64_end); if (base64 != "base64") return result; size_t data_begin = base64_end + 1; auto data = base::MakeStringPiece(content.begin() + data_begin, content.end()); std::string decoded_data; if (!base::Base64Decode(data, &decoded_data)) return result; result.first = mime_type; result.second = base::MakeRefCounted<base::RefCountedString>(std::move(decoded_data)); return result; }
D:\chromium110\chromium\src\net\base\data_url.cc :
bool DataURL::Parse(const GURL& url, std::string* mime_type, std::string* charset, std::string* data) { if (!url.is_valid() || !url.has_scheme()) return false; DCHECK(mime_type->empty()); DCHECK(charset->empty()); DCHECK(!data || data->empty()); base::StringPiece content; std::string content_string; if (base::FeatureList::IsEnabled(base::features::kOptimizeDataUrls)) { // Avoid copying the URL content which can be expensive for large URLs. content = url.GetContentPiece(); } else { content_string = url.GetContent(); content = content_string; } base::StringPiece::const_iterator comma = base::ranges::find(content, ','); if (comma == content.end()) return false; std::vector<base::StringPiece> meta_data = base::SplitStringPiece(base::MakeStringPiece(content.begin(), comma), ";", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); // These are moved to |mime_type| and |charset| on success. std::string mime_type_value; std::string charset_value; auto iter = meta_data.cbegin(); if (iter != meta_data.cend()) { mime_type_value = base::ToLowerASCII(*iter); ++iter; } static constexpr base::StringPiece kBase64Tag("base64"); static constexpr base::StringPiece kCharsetTag("charset="); bool base64_encoded = false; for (; iter != meta_data.cend(); ++iter) { if (!base64_encoded && base::EqualsCaseInsensitiveASCII(*iter, kBase64Tag)) { base64_encoded = true; } else if (charset_value.empty() && base::StartsWith(*iter, kCharsetTag, base::CompareCase::INSENSITIVE_ASCII)) { charset_value = std::string(iter->substr(kCharsetTag.size())); // The grammar for charset is not specially defined in RFC2045 and // RFC2397. It just needs to be a token. if (!HttpUtil::IsToken(charset_value)) return false; } } if (mime_type_value.empty()) { // Fallback to the default if nothing specified in the mediatype part as // specified in RFC2045. As specified in RFC2397, we use |charset| even if // |mime_type| is empty. mime_type_value = "text/plain"; if (charset_value.empty()) charset_value = "US-ASCII"; } else if (!ParseMimeTypeWithoutParameter(mime_type_value, nullptr, nullptr)) { // Fallback to the default as recommended in RFC2045 when the mediatype // value is invalid. For this case, we don't respect |charset| but force it // set to "US-ASCII". mime_type_value = "text/plain"; charset_value = "US-ASCII"; } // The caller may not be interested in receiving the data. if (data) { // Preserve spaces if dealing with text or xml input, same as mozilla: // https://bugzilla.mozilla.org/show_bug.cgi?id=138052 // but strip them otherwise: // https://bugzilla.mozilla.org/show_bug.cgi?id=37200 // (Spaces in a data URL should be escaped, which is handled below, so any // spaces now are wrong. People expect to be able to enter them in the URL // bar for text, and it can't hurt, so we allow it.) // // TODO(mmenke): Is removing all spaces reasonable? GURL removes trailing // spaces itself, anyways. Should we just trim leading spaces instead? // Allowing random intermediary spaces seems unnecessary. auto raw_body = base::MakeStringPiece(comma + 1, content.end()); // For base64, we may have url-escaped whitespace which is not part // of the data, and should be stripped. Otherwise, the escaped whitespace // could be part of the payload, so don't strip it. if (base64_encoded) { // If the data URL is well formed, we can decode it immediately. if (base::FeatureList::IsEnabled(base::features::kOptimizeDataUrls) && IsDataURLReadyForDecode(raw_body)) { if (!base::Base64Decode(raw_body, data)) return false; } else { std::string unescaped_body = base::UnescapeBinaryURLComponent(raw_body); if (!base::Base64Decode(unescaped_body, data, base::Base64DecodePolicy::kForgiving)) return false; } } else { // Strip whitespace for non-text MIME types. std::string temp; if (!(mime_type_value.compare(0, 5, "text/") == 0 || mime_type_value.find("xml") != std::string::npos)) { temp = std::string(raw_body); base::EraseIf(temp, base::IsAsciiWhitespace<char>); raw_body = temp; } *data = base::UnescapeBinaryURLComponent(raw_body); } } *mime_type = std::move(mime_type_value); *charset = std::move(charset_value); return true; }
解析方法2:
D:\chromium110\chromium\src\third_party\blink\renderer\platform\network\network_utils.cc
D:\chromium110\chromium\src\net\base\data_url_unittest.cc
#include "net/base/data_url.h" #include "url/gurl.h" #include "net/base/net_errors.h" #include "net/http/http_response_headers.h" std::string mime_type, charset, data; std::unique_ptr<std::string> response_body; if (net::DataURL::Parse(url_, &mime_type, &charset, &data)) response_body = std::make_unique<std::string>(std::move(data)); ////////////////////////////////////////// struct ParseTestData { const char* url; bool is_valid; const char* mime_type; const char* charset; const std::string data; }; } // namespace TEST(DataURLTest, Parse) { const ParseTestData tests[] = { {"data:", false, "", "", ""}, {"data:,", true, "text/plain", "US-ASCII", ""}, } for (const auto& test : tests) { SCOPED_TRACE(test.url); std::string mime_type; std::string charset; std::string data; bool ok = DataURL::Parse(GURL(test.url), &mime_type, &charset, &data); EXPECT_EQ(ok, test.is_valid); EXPECT_EQ(test.mime_type, mime_type); EXPECT_EQ(test.charset, charset); EXPECT_EQ(test.data, data); } //////////////////////////////////////////////////////////// GURL url(provider.ConsumeRemainingBytesAsString()); std::string mime_type, charset, data; if (!net::DataURL::Parse(url, &mime_type, &charset, &data)) { // This case is only here to allow cluster fuzz pass any url, // to unblock further fuzzing. mime_type = "text/html"; charset = "UTF-8"; } blink::WebNavigationParams::FillStaticResponse( navigation_params.get(), blink::WebString::FromUTF8(mime_type), blink::WebString::FromUTF8(charset), data); //////////////////////////////////////// // Test a slightly larger data URL. TEST(DataURLTest, Image) { // Use our nice little Chrome logo. GURL image_url( "data:image/png;base64," "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAADVklEQVQ4jX2TfUwUB" "BjG3w1y+HGcd9dxhXR8T4awOccJGgOSWclHImznLkTlSw0DDQXkrmgYgbUYnlQTqQ" "xIEVxitD5UMCATRA1CEEg+Qjw3bWDxIauJv/5oumqs39/P827vnucRmYN0gyF01GI" "5MpCVdW0gO7tvNC+vqSEtbZefk5NuLv1jdJ46p/zw0HeH4+PHr3h7c1mjoV2t5rKz" "Mx1+fg9bAgK6zHq9cU5z+LpA3xOtx34+vTeT21onRuzssC3zxbbSwC13d/pFuC7Ck" "IMDxQpF7r/MWq12UctI1dWWm99ypqSYmRUBdKem8MkrO/kgaTt1O7YzlpzE5GIVd0" "WYUqt57yWf2McHTObYPbVD+ZwbtlLTVMZ3BW+TnLyXLaWtmEq6WJVbT3HBh3Svj2H" "QQcm43XwmtoYM6vVKleh0uoWvnzW3v3MpidruPTQPf0bia7sJOtBM0ufTWNvus/nk" "DFHF9ZS+uYVjRUasMeHUmyLYtcklTvzWGFZnNOXczThvpKIzjcahSqIzkvDLayDq6" "D3eOjtBbNUEIZYyqsvj4V4wY92eNJ4IoyhTbxXX1T5xsV9tm9r4TQwHLiZw/pdDZJ" "ea8TKmsmR/K0uLh/GwnCHghTja6lPhphezPfO5/5MrVvMzNaI3+ERHfrFzPKQukrQ" "GI4d/3EFD/3E2mVNYvi4at7CXWREaxZGD+3hg28zD3gVMd6q5c8GdosynKmSeRuGz" "pjyl1/9UDGtPR5HeaKT8Wjo17WXk579BXVUhN64ehF9fhRtq/uxxZKzNiZFGD0wRC" "3NFROZ5mwIPL/96K/rKMMLrIzF9uhHr+/sYH7DAbwlgC4J+R2Z7FUx1qLnV7MGF40" "smVSoJ/jvHRfYhQeUJd/SnYtGWhPHR0Sz+GE2F2yth0B36Vcz2KpnufBJbsysjjW4" "kblBUiIjiURUWqJY65zxbnTy57GQyH58zgy0QBtTQv5gH15XMdKkYu+TGaJMnlm2O" "34uI4b9tflqp1+QEFGzoW/ulmcofcpkZCYJhDfSpme7QcrHa+Xfji8paEQkTkSfmm" "oRWRNZr/F1KfVMjW+IKEnv2FwZfKdzt0BQR6lClcZR0EfEXEfv/G6W9iLiIyCoReV" "5EnhORIBHx+ufPj/gLB/zGI/G4Bk0AAAAASUVORK5CYII="); std::string mime_type; std::string charset; std::string data; scoped_refptr<HttpResponseHeaders> headers; EXPECT_EQ(OK, DataURL::BuildResponse(image_url, "GET", &mime_type, &charset, &data, &headers)); EXPECT_EQ(911u, data.size()); EXPECT_EQ("image/png", mime_type); EXPECT_TRUE(charset.empty()); ASSERT_TRUE(headers); std::string value; EXPECT_EQ(headers->GetStatusLine(), "HTTP/1.1 200 OK"); EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &value)); EXPECT_EQ(value, "image/png"); }
生成方法:
std::string MakeDataURIForImage(base::span<const uint8_t> image_data, base::StringPiece mime_subtype) { std::string result = "data:image/"; result.append(mime_subtype.begin(), mime_subtype.end()); result += ";base64,"; result += base::Base64Encode(image_data); return result; }
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek “源神”启动!「GitHub 热点速览」
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· 我与微信审核的“相爱相杀”看个人小程序副业
· C# 集成 DeepSeek 模型实现 AI 私有化(本地部署与 API 调用教程)
· DeepSeek R1 简明指南:架构、训练、本地部署及硬件要求
2019-03-01 flink部署操作-flink standalone集群安装部署
2019-03-01 Flink的高可用集群环境
2016-03-01 练琴
2016-03-01 Android中的Binder机制的简要理解
2016-03-01 Handler,Thread,Looper之间关系小结