DC.Web.HttpCompress 压缩模块源码分析(二)
不知道大家思考的怎么样了,有没有什么思路,有的话可以一起讨论下。
现在说下这个压缩模块的大致思路。
1.首先,在客户端第一次请求页面时将html中<head>或<body>标签内的css和js引用替换成自定义格式,例如
<script src="js/js1/jquery-1.7.1.min.js" type="text/javascript"></script>
//替换为
<script src="/js/js1/js.axd?files=jquery-1.7.1.min.js" type="text/javascript"></script>
<link href="css/main.css" rel="stylesheet" type="text/css" />
//替换为
<link href="/css/css.axd?files=main.css" type="text/css" rel="stylesheet">
即替换为"/Path/js.axd?files=文件1,文件2"。文件1和文件2位于同一路径下。css替换同理。
然后将新引用重新插入html内,再将html返回给客户端。
客户端下载完毕后开始加载,遇到css和js引用后会再向服务器发出请求,服务器交给我们自定义的HttpHandler处理,将js和css文件去空格,再利用Filter设置将输出流进行Gzip或Deflate压缩,然后传给客户端,客户端接收后解压缩就ok了~
2.具体实现
那么在什么阶段替换html最好呢?作者给出的答案是PostReleaseRequestState(在 ASP.NET 已完成所有请求事件处理程序的执行并且请求状态数据已存储时发生)。
PostReleaseRequestState是添加filter的最好时机,其他的都太早或太晚,在这个时候完整的response content已经创建完毕,并且页面也已经完整的执行,但是从asp.net的角度看仍需要经过几个模块(详见上一篇文章流程图),我们在这里filer content并且所有的javascript都已经正确的renders。
今天主要跟大家分享一下这部分的代码。
我们创建一个类实现IHttpModule接口
public class HttpModule : IHttpModule
{
void IHttpModule.Dispose()
{
}
void IHttpModule.Init(HttpApplication context)
{
context.PostReleaseRequestState += new EventHandler(context_PostReleaseRequestState);
}
void context_PostReleaseRequestState(object sender, EventArgs e)
{
..........
}
}
并且在初始化时注册PostReleaseRequestState事件。
下面是context_PostReleaseRequestState方法代码及个人理解
void context_PostReleaseRequestState(object sender, EventArgs e)
{
//拿到HttpApplication
HttpApplication app = (HttpApplication)sender;
//以请求的绝对路径为基准作为cache的key
string cache = app.Request.Url.AbsoluteUri;
//如果是微软的Ajax,则不做压缩处理
if (app.Request["HTTP_X_MICROSOFTAJAX"] != null)
return;
string realPath = "";
Configuration settings = null;
// 获取配置文件配置(这里涉及了配置文件类,因为很简单,大家应该能看懂,就不细说了),先从Cache中查找是否存有配置信息,如果有就直接从缓存中取,没有则从Web.config中获取
if (app.Context.Cache["DCCompressModuleConfig"] == null)
{
settings = (Configuration)ConfigurationManager.GetSection("DCWeb/HttpCompress");
app.Context.Cache["DCCompressModuleConfig"] = settings;
}
else
settings = (Configuration)app.Context.Cache["DCCompressModuleConfig"];
if (settings != null)
{
//将配置信息插入缓存
app.Context.Cache.Insert("DCCompressModuleConfig", settings);
//如果配置中没有设置CompressionType则不压缩,取值有gzip或deflate
if (settings.CompressionType == CompressionType.None)
return;
//获取请求文件名称
realPath = app.Request.Path.Remove(0, app.Request.ApplicationPath.Length);
realPath = (realPath.StartsWith("/")) ? realPath.Remove(0, 1) : realPath;
bool isIncludedPath, isIncludedMime;
//是否包含在IncludedPaths(配置中)中
isIncludedPath = (settings.IncludedPaths.Contains(realPath) | settings.IncludedPaths.Contains("~/" + realPath));
//是否为包含的Mime(配置中)类型
isIncludedMime = (settings.IncludedMimeTypes.Contains(app.Response.ContentType));
// 即没有在包含目录中,也没有在允许压缩的Mime中,则不压缩
if (!isIncludedPath && !isIncludedMime)
return;
// 如果包含在ExcludedPaths(不压缩路径,配置中),则不压缩
if (settings.ExcludedPaths.Contains(realPath) | settings.ExcludedPaths.Contains("~/" + realPath))
return;
// 如果包含在ExcludedMimeTypes(不压缩的Mime类型,配置中),则不压缩
if (settings.ExcludedMimeTypes.Contains(app.Response.ContentType))
return;
}
//设置缓存根据Accept-Encoding标头改变输出
//注意如果这里设置了true,那么之前的流程里设置的HttpHeader会失效,需从新设置
app.Context.Response.Cache.VaryByHeaders["Accept-Encoding"] = true;
//拿到浏览器支持的压缩类型(Gzip或Deflate)
string acceptedTypes = app.Request.Headers["Accept-Encoding"];
//如果Response的ContentType是text/html则不进行缓存,否则进行缓存
if (app.Response.ContentType != "text/html")
{
app.Context.Response.Cache.SetCacheability(HttpCacheability.Public);
app.Context.Response.Cache.SetMaxAge(new TimeSpan(7, 0, 0, 0));
app.Context.Response.Cache.SetExpires(DateTime.Now.AddYears(1));
try
{
app.Context.Response.Cache.SetETag(Util.GetMd5Sum(cache));//GetMd5Sum方法在工具类中,不太难,大家可以看下,作用是根据cache值返回一个唯一的值
}
catch (InvalidOperationException)//处理重复设置异常
{
app.Context.Response.AppendHeader("ETag",Util.GetMd5Sum(cache));
}
}
app.Context.Response.Charset = "utf-8";
//浏览器不支持压缩,返回
if (acceptedTypes == null)
return;
//自定义filter包装Response的Filter对响应主体进行压缩处理
CompressionPageFilter filter = new CompressionPageFilter(app.Response.Filter);
filter.App = app;
app.Response.Filter = filter;
//检查IE版本
if (app.Context.Request.Browser.Browser == "IE")
{
//IE6以下不压缩,如果是IE6的不支持压缩的版本也不压缩
if (app.Context.Request.Browser.MajorVersion < 6)
return;
else if (app.Context.Request.Browser.MajorVersion == 6 &&
!string.IsNullOrEmpty(app.Context.Request.ServerVariables["HTTP_USER_AGENT"]) &&
app.Context.Request.ServerVariables["HTTP_USER_AGENT"].Contains("EV1"))
return;
}
//设置Filter的压缩类型,默认为gzip,也可设置为deflate
acceptedTypes = acceptedTypes.ToLower();
if ((acceptedTypes.Contains("gzip") || acceptedTypes.Contains("x-gzip") || acceptedTypes.Contains("*")) && (settings.CompressionType != CompressionType.Deflate))
filter.Compress = "gzip";
else if (acceptedTypes.Contains("deflate"))
filter.Compress = "deflate";
if (filter.Compress != "none")
app.Response.AppendHeader("Content-Encoding", filter.Compress);
}
接下来就是重点的Filter包装类CompressionPageFilter了,下面是代码及个人理解
private class CompressionPageFilter : Stream
{
//当前HttpApplication
private HttpApplication app;
public HttpApplication App
{
get { return app; }
set { app = value; }
}
//压缩类型
private string compress = "none";
public string Compress
{
get { return compress; }
set { compress = value; }
}
//要输出的内容
StringBuilder responseHtml;
//匹配css,js引用的正则
const string _cssPattern = "(?<HTML><link[^>]*href\\s*=\\s*[\\\"\\']?(?<HRef>[^\"'>\\s]*)[\\\"\\']?[^>]*>)";
const string _jsPattern = "(?<HTML><script[^>]*src\\s*=\\s*[\\\"\\']?(?<SRC>[^\"'>\\s]*)[\\\"\\']?[^>]*></script>)";
//初始化
public CompressionPageFilter(Stream sink)
{
_sink = sink;
responseHtml = new StringBuilder();
}
private Stream _sink;
//重写属性及方法,都比较容易明白
public override bool CanRead
{
get { return true; }
}
public override bool CanSeek
{
get { return true; }
}
public override bool CanWrite
{
get { return true; }
}
public override void Flush()
{
_sink.Flush();
}
public override long Length
{
get { return 0; }
}
private long _position;
public override long Position
{
get { return _position; }
set { _position = value; }
}
public override int Read(byte[] buffer, int offset, int count)
{
return _sink.Read(buffer, offset, count);
}
public override long Seek(long offset, SeekOrigin origin)
{
return _sink.Seek(offset, origin);
}
public override void SetLength(long value)
{
_sink.SetLength(value);
}
public override void Close()
{
_sink.Close();
}
public override void Write(byte[] buffer, int offset, int count)
{
string strBuffer = UTF8Encoding.UTF8.GetString(buffer, offset, count);
//等待</html>关闭标签标签,忽略大小写
Regex eof = new Regex("</html>", RegexOptions.IgnoreCase);
responseHtml.Append(strBuffer);
//如果输出到</html>标签
if (eof.IsMatch(strBuffer))
{
//当压缩html时,一些有用的结束字符会被去除,所以我们添加一些空行用于剪裁替代那些有用的字符
responseHtml.Append(Environment.NewLine + Environment.NewLine + Environment.NewLine + Environment.NewLine + Environment.NewLine + Environment.NewLine + Environment.NewLine + Environment.NewLine);
string html = responseHtml.ToString();
//替换css和js引用
html = ReplaceJS(html);
html = ReplaceCss(html);
byte[] data = UTF8Encoding.UTF8.GetBytes(html);
//是否可压缩若不是gzip或deflate则不压缩
if (compress == "gzip")
{
GZipStream gzip = new GZipStream(_sink, CompressionMode.Compress);
gzip.Write(data, 0, data.Length);
}
else if (compress == "deflate")
{
DeflateStream deflate = new DeflateStream(_sink, CompressionMode.Compress);
deflate.Write(data, 0, data.Length);
}
else
_sink.Write(data, 0, data.Length);
}
}
//替换css引用,以便于请求时使用我们自定义的HttpHandler压缩并缓存css文件
public string ReplaceCss(string html)
{
//创建一个css集合
List<string> stylesheets = new List<string>();
// 创建一个Dictionary用于组合同一目录下的css
Dictionary<string, List<string>> css = new Dictionary<string, List<string>>();
//创建一个uri用于获取css时使用
Uri baseUri = new Uri(app.Request.Url.AbsoluteUri);
//对每一个匹配的css引用进行替换
foreach (Match match in Regex.Matches(html, _cssPattern, RegexOptions.IgnoreCase))
{
//记录完整的引用用于替换
string linkHtml = match.Groups[0].Value;
//引用的href
string href = match.Groups[2].Value;
//如果不是css引用则不替换
if (!linkHtml.Contains("stylesheet") && !linkHtml.Contains("text/css")) continue;
//解决相对路径或绝对路径问题
Uri uri = new Uri(baseUri, href);
string file = "";
//检查是否是本地文件,不是则将原本路径拼接上
if (uri.Host == baseUri.Host)
{
//检查是否是此Application的文件,不是则不拼接
if (uri.AbsolutePath.ToLower().StartsWith(app.Context.Request.ApplicationPath.ToLower()))
{
// 合并相同路径的css的请求 (文件的拼接在HttpHandler里)
int index = uri.AbsolutePath.LastIndexOf("/");
string path = uri.AbsolutePath.Substring(0, index + 1);
file = uri.AbsolutePath.Substring(index + 1);
if (!css.ContainsKey(path))
css.Add(path, new List<string>());
css[path].Add(file + (href.Contains("?") ? href.Substring(href.IndexOf("?")) : ""));
//删除原有引用
html = html.Replace(linkHtml, "");
//继续下一个~
continue;
}
else
file = uri.AbsolutePath + uri.Query;
}
else
file = uri.AbsoluteUri;
//替换的格式在这里~~
string newLinkHtml = linkHtml.Replace(href, "css.axd?files=" + file);
//此处替换的为非本应用的css引用
html = html.Replace(linkHtml, newLinkHtml);
}
StringBuilder link = new StringBuilder();
link.AppendLine("");
foreach (string key in css.Keys)
{
link.AppendLine(string.Format("<link href='{0}css.axd?files={1}' type='text/css' rel='stylesheet' />", key, string.Join(",", css[key].ToArray())));
}
//找到head标签,将本地的引用插入到<head>后面
int x = html.IndexOf("<head");
int num = 0;
if (x > -1)
{
num = html.Substring(x).IndexOf(">");
html = html.Insert(x + num + 1, link.ToString());
}
return html;
}
//替换js引用
public string ReplaceJS(string html)
{
//如果js引用在head内则尝试合并同目录下的js
int start, end;
if (html.Contains("<head") && html.Contains("</head>"))
{
start = html.IndexOf("<head");
end = html.IndexOf("</head>");
string head = html.Substring(start, end - start);
head = ReplaceJSInHead(head);
html = html.Substring(0, start) + head + html.Substring(end);
}
// 在body内的js引用不合并,并且不插到head内,因为body内的脚本移到head的话有可能会引起一些问题,又或者WebResource.axd或ScriptResource.axd也会引起问题
if (html.Contains("<body") && html.Contains("</body>"))
{
start = html.IndexOf("<body");
end = html.IndexOf("</body>");
string head = html.Substring(start, end - start);
head = ReplaceJSInBody(head);
html = html.Substring(0, start) + head + html.Substring(end);
}
return html;
}
//替换头部js引用,类似css
public string ReplaceJSInHead(string html)
{
List<string> javascript = new List<string>();
Dictionary<string, List<string>> js = new Dictionary<string, List<string>>();
Uri baseUri = new Uri(app.Request.Url.AbsoluteUri);
foreach (Match match in Regex.Matches(html, _jsPattern, RegexOptions.IgnoreCase))
{
string linkHtml = match.Groups[0].Value;
string src = match.Groups[2].Value;
Uri uri = new Uri(baseUri, src);
//只压缩js和WebResource.axd的文件
if (!Path.GetExtension(uri.AbsolutePath).Equals("js") && uri.AbsolutePath.Contains("WebResource.axd"))
continue;
if (uri.Host == baseUri.Host)
{
if (uri.AbsolutePath.ToLower().StartsWith(app.Context.Request.ApplicationPath.ToLower()))
{
int index = uri.AbsolutePath.LastIndexOf("/");
string path = uri.AbsolutePath.Substring(0, index + 1);
string file = uri.AbsolutePath.Substring(index + 1);
if (!js.ContainsKey(path))
js.Add(path, new List<string>());
js[path].Add(file + (src.Contains("?") ? src.Substring(src.IndexOf("?")) : ""));
}
else
javascript.Add(uri.AbsolutePath + uri.Query);
}
else
javascript.Add(uri.AbsoluteUri);
html = html.Replace(linkHtml, "");
}
int x = html.IndexOf("<head");
//int num = html.Substring(x).IndexOf(">");
string link = "";
//原本有问题,修改为将外部引用的js添加到最前面,类似百度谷歌地图
if (javascript.Count > 0)
{
link = string.Format("<script src='js.axd?files={0}' type='text/javascript' /></script>", string.Join(",", javascript.ToArray()));
html = html.Insert(html.Length, link + Environment.NewLine);
}
//此处有bug,原本是按照js引用的顺序插入<head>标签后,会造成js引用顺序颠倒,故改为插到</head>前
foreach (string key in js.Keys)
{
link = string.Format("<script src='{0}js.axd?files={1}' type='text/javascript' ></script>", key, string.Join(",", js[key].ToArray()));
html = html.Insert(html.Length, link + Environment.NewLine);//原本为html = html.Insert(html.Length+num+1, link + Environment.NewLine);
}
return html;
}
//替换body内部js引用,类似ReplaceJSInHead,不过不再合并同目录下js
public string ReplaceJSInBody(string html)
{
Uri baseUri = new Uri(app.Request.Url.AbsoluteUri);
foreach (Match match in Regex.Matches(html, _jsPattern, RegexOptions.IgnoreCase))
{
string linkHtml = match.Groups[0].Value;
string src = match.Groups[2].Value;
Uri uri = new Uri(baseUri, src);
if (!uri.AbsolutePath.EndsWith(".js") && !uri.AbsolutePath.Contains("WebResource.axd"))
continue;
string file = "";
string path = "";
if (uri.Host == baseUri.Host)
{
if (uri.AbsolutePath.ToLower().StartsWith(app.Context.Request.ApplicationPath.ToLower()))
{
int index = uri.AbsolutePath.LastIndexOf("/");
path = uri.AbsolutePath.Substring(0, index + 1);
file = uri.AbsolutePath.Substring(index + 1) + (src.Contains("?") ? src.Substring(src.IndexOf("?")) : "");
}
else
file = uri.AbsolutePath + uri.Query;
}
else
file = uri.AbsoluteUri;
string newLinkHtml = linkHtml.Replace(src, path + "js.axd?files=" + file);
html = html.Replace(linkHtml, newLinkHtml);
}
return html;
}
}
3.好累啊,希望大家一起讨论!~