分析tomcat对http的解析,我们还是要从tomcat接收到请求开始,我们可以通过浏览器直接请求一下,然后打断点,调试一下。
首先说明,一下分析的是NIO类型的socket处理,大致类图如下:
org.apache.tomcat.util.net.NioEndpoint.Acceptor
其run方法
public void org.apache.tomcat.util.net.NioEndpoint.Acceptor.run() {
int errorDelay = 0;
// Loop until we receive a shutdown command
while (running) {
。。。。。。
try {
//if we have reached max connections, wait
countUpOrAwaitConnection();
SocketChannel socket = null;
try {
// Accept the next incoming connection from the server
// socket
//阻塞,等待请求
socket = serverSock.accept();
} catch (IOException ioe) {
。。。。。。
}
// Successful accept, reset the error delay
errorDelay = 0;
// Configure the socket
if (running && !paused) {
// setSocketOptions() will hand the socket off to
// an appropriate processor if successful
//(*1*)
if (!setSocketOptions(socket)) {
closeSocket(socket);
}
} else {
closeSocket(socket);
}
} catch (Throwable t) {
ExceptionUtils.handleThrowable(t);
log.error(sm.getString("endpoint.accept.fail"), t);
}
}
state = AcceptorState.ENDED;
}
//(*1*)
protected boolean org.apache.tomcat.util.net.NioEndpoint.setSocketOptions(SocketChannel socket) {
// Process the connection
try {
//disable blocking, APR style, we are gonna be polling it
//设置为非阻塞
socket.configureBlocking(false);
//获取请求的socket
Socket sock = socket.socket();
//设置socket参数
socketProperties.setProperties(sock);
//从共享对象池中获取一个NioChannel
NioChannel channel = nioChannels.pop();
if (channel == null) {
//创建SocketBufferHandler,用于获取缓冲区,设置可接受的buffer大小,可写的buffer大小
SocketBufferHandler bufhandler = new SocketBufferHandler(
socketProperties.getAppReadBufSize(),
socketProperties.getAppWriteBufSize(),
socketProperties.getDirectBuffer());
if (isSSLEnabled()) {
channel = new SecureNioChannel(socket, bufhandler, selectorPool, this);
} else {
channel = new NioChannel(socket, bufhandler);
}
} else {
channel.setIOChannel(socket);
channel.reset();
}
//将通道包装成事件注册到poller的事件队列中
//如果有必要的话会立即唤醒selector,然后注册通道到对应的selector中
getPoller0().register(channel);
} catch (Throwable t) {
ExceptionUtils.handleThrowable(t);
try {
log.error("",t);
} catch (Throwable tt) {
ExceptionUtils.handleThrowable(tt);
}
// Tell to close the socket
return false;
}
return true;
}
新的请求被注册到了poller中,我们来看看poller的run方法
public void org.apache.tomcat.util.net.NioEndpoint.Pollerrun() {
// Loop until destroy() is called
while (true) {
boolean hasEvents = false;
try {
if (!close) {
//判断是否有任务
hasEvents = events();
//如果添加的任务大于零,那么进行立即获取select,处理掉任务,防止任务堆积
if (wakeupCounter.getAndSet(-1) > 0) {
//if we are here, means we have other stuff to do
//do a non blocking select
keyCount = selector.selectNow();
} else {
//如果没有啥任务,默认最大select时间为1s
keyCount = selector.select(selectorTimeout);
}
//设置为零
wakeupCounter.set(0);
}
//如果要求关闭,那么处理调用任务队列中的任务,然后关闭
if (close) {
events();
timeout(0, false);
try {
selector.close();
} catch (IOException ioe) {
log.error(sm.getString("endpoint.nio.selectorCloseFail"), ioe);
}
break;
}
} catch (Throwable x) {
ExceptionUtils.handleThrowable(x);
log.error("",x);
continue;
}
//either we timed out or we woke up, process events first
if ( keyCount == 0 ) hasEvents = (hasEvents | events());
Iterator<SelectionKey> iterator =
keyCount > 0 ? selector.selectedKeys().iterator() : null;
// Walk through the collection of ready keys and dispatch
// any active event.
while (iterator != null && iterator.hasNext()) {
SelectionKey sk = iterator.next();
NioSocketWrapper attachment = (NioSocketWrapper)sk.attachment();
// Attachment may be null if another thread has called
// cancelledKey()
if (attachment == null) {
iterator.remove();
} else {
iterator.remove();
//处理
processKey(sk, attachment);
}
}//while
//process timeouts
timeout(keyCount,hasEvents);
}//while
getStopLatch().countDown();
}
处理key
protected void org.apache.tomcat.util.net.NioEndpoint.Poller.processKey(SelectionKey sk, NioSocketWrapper attachment) {
try {
if ( close ) {
cancelledKey(sk);
} else if ( sk.isValid() && attachment != null ) {
if (sk.isReadable() || sk.isWritable() ) {
if ( attachment.getSendfileData() != null ) {
processSendfile(sk,attachment, false);
} else {
unreg(sk, attachment, sk.readyOps());
boolean closeSocket = false;
// Read goes before write
if (sk.isReadable()) {
//(*1*)
if (!processSocket(attachment, SocketEvent.OPEN_READ, true)) {
closeSocket = true;
}
}
。。。。。。
我们还是直接跳到处理http请求协议的方法吧
public SocketState org.apache.coyote.http11.Http11Processor.service(SocketWrapperBase<?> socketWrapper)
throws IOException {
RequestInfo rp = request.getRequestProcessor();
rp.setStage(org.apache.coyote.Constants.STAGE_PARSE);
// Setting up the I/O
//关联SocketWrapperBase
setSocketWrapper(socketWrapper);
//初始化输入缓冲
inputBuffer.init(socketWrapper);
//初始化输出缓冲
outputBuffer.init(socketWrapper);
// Flags
keepAlive = true;
openSocket = false;
readComplete = true;
boolean keptAlive = false;
SendfileState sendfileState = SendfileState.DONE;
while (!getErrorState().isError() && keepAlive && !isAsync() && upgradeToken == null &&
sendfileState == SendfileState.DONE && !endpoint.isPaused()) {
// Parsing the request header
try {
//解析请求协议
if (!inputBuffer.parseRequestLine(keptAlive)) {
//读取失败时判断是否需要升级
if (inputBuffer.getParsingRequestLinePhase() == -1) {
return SocketState.UPGRADING;
//是否认为请求读取结束
} else if (handleIncompleteRequestLineRead()) {
break;
}
}
。。。。。。
}
//如果端点已经停止,那么设置服务不可用
if (endpoint.isPaused()) {
// 503 - Service unavailable
response.setStatus(503);
setErrorState(ErrorState.CLOSE_CLEAN, null);
} else {
keptAlive = true;
// Set this every time in case limit has been changed via JMX
//设置请求头个数
request.getMimeHeaders().setLimit(endpoint.getMaxHeaderCount());
//解析请求头
if (!inputBuffer.parseHeaders()) {
// We've read part of the request, don't recycle it
// instead associate it with the socket
//如果没有解析完成,也就是数据还没有完全发送过来,那么标识保持这个socket不关闭
openSocket = true;
//设置读取完成标识为false
readComplete = false;
//跳出循环,期待下次的通道读取事件
break;
}
。。。。。。
}
}
。。。。。。
if (!getErrorState().isError()) {
// Setting up filters, and parse some request headers
//记录RequestInfo阶段为准备阶段
rp.setStage(org.apache.coyote.Constants.STAGE_PREPARE);
try {
//准备请求,内部主要是对解析请求头等信息做进一步的处理
prepareRequest();
} catch (Throwable t) {
ExceptionUtils.handleThrowable(t);
if (log.isDebugEnabled()) {
log.debug(sm.getString("http11processor.request.prepare"), t);
}
// 500 - Internal Server Error
response.setStatus(500);
setErrorState(ErrorState.CLOSE_CLEAN, t);
getAdapter().log(request, response, 0);
}
}
//设置是否需要长连接
if (maxKeepAliveRequests == 1) {
keepAlive = false;
} else if (maxKeepAliveRequests > 0 &&
socketWrapper.decrementKeepAlive() <= 0) {
keepAlive = false;
}
// Process the request in the adapter
if (!getErrorState().isError()) {
try {
//变更请求阶段为服务阶段,表示可以开始提供服务了
rp.setStage(org.apache.coyote.Constants.STAGE_SERVICE);
//开始调用容器,通过排管一直调用到servletWrapper
getAdapter().service(request, response);
} catch (Throwable t) {
}
}
。。。。。。
}
解析请求协议
boolean org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(boolean keptAlive) throws IOException {
// check state
if (!parsingRequestLine) {
return true;
}
//
// Skipping blank lines
//parsingRequestLinePhase表示解析请求协议的阶段
if (parsingRequestLinePhase < 2) {
byte chr = 0;
do {
// Read new bytes if needed
//如果解析的缓冲的开始位置大于等于上限位置,默认tomcat在没有读取任何数据的时候设置的position为零,limit也为零
if (byteBuffer.position() >= byteBuffer.limit()) {
if (keptAlive) {
// Haven't read any request data yet so use the keep-alive
// timeout.
//设置读取超时时间
wrapper.setReadTimeout(wrapper.getEndpoint().getKeepAliveTimeout());
}
//从通道读取数据
if (!fill(false)) {
// A read is pending, so no longer in initial state
//如果读取到了值,那么设置读取阶段为1
parsingRequestLinePhase = 1;
return false;
}
// At least one byte of the request has been received.
// Switch to the socket timeout.
wrapper.setReadTimeout(wrapper.getEndpoint().getConnectionTimeout());
}
if (!keptAlive && byteBuffer.position() == 0 && byteBuffer.limit() >= CLIENT_PREFACE_START.length - 1) {
boolean prefaceMatch = true;
//是否匹配http2协议头
for (int i = 0; i < CLIENT_PREFACE_START.length && prefaceMatch; i++) {
if (CLIENT_PREFACE_START[i] != byteBuffer.get(i)) {
prefaceMatch = false;
}
}
if (prefaceMatch) {
// HTTP/2 preface matched
parsingRequestLinePhase = -1;
return false;
}
}
// Set the start time once we start reading data (even if it is
// just skipping blank lines)
if (request.getStartTime() < 0) {
request.setStartTime(System.currentTimeMillis());
}
//获取第一个字符,如果是换行符,那么继续读取,主要是用于跳过空白行
chr = byteBuffer.get();
} while ((chr == Constants.CR) || (chr == Constants.LF));
//因为上面的get方法会使的position加1,这里需要调回去
byteBuffer.position(byteBuffer.position() - 1);
//设置解析请求开始位置,刚开始的时候这个位置是零
parsingRequestLineStart = byteBuffer.position();
//设置为阶段2
parsingRequestLinePhase = 2;
if (log.isDebugEnabled()) {
log.debug("Received ["
+ new String(byteBuffer.array(), byteBuffer.position(), byteBuffer.remaining(), StandardCharsets.ISO_8859_1) + "]");
}
}
//阶段2
if (parsingRequestLinePhase == 2) {
//
// Reading the method name
// Method name is a token
//
boolean space = false;
while (!space) {
// Read new bytes if needed
//如果数据已经被处理,或者还未读取,那么尝试从通道中读取数据,以便接下来的处理
if (byteBuffer.position() >= byteBuffer.limit()) {
if (!fill(false)) // request line parsing
return false;
}
// Spec says method name is a token followed by a single SP but
// also be tolerant of multiple SP and/or HT.
int pos = byteBuffer.position();
byte chr = byteBuffer.get();
//一直循环读取到的字符,一直寻找到空格或者table符
if (chr == Constants.SP || chr == Constants.HT) {
space = true;
//设置消息字节,内部维护字节块,通过偏移地址和结束地址来表示真正需要用的数据
//这里截取的是请求方法
request.method().setBytes(byteBuffer.array(), parsingRequestLineStart,
pos - parsingRequestLineStart);
//其他情况,判断是否是{或者}这种类型,如果在解析方法时出现这种字符,那么抛出错误
} else if (!HttpParser.isToken(chr)) {
byteBuffer.position(byteBuffer.position() - 1);
throw new IllegalArgumentException(sm.getString("iib.invalidmethod"));
}
}
//设置解析阶段为3,表示解析方法阶段的结束
parsingRequestLinePhase = 3;
}
//第三阶段,此阶段主要是跳过空格和table
if (parsingRequestLinePhase == 3) {
// Spec says single SP but also be tolerant of multiple SP and/or HT
boolean space = true;
while (space) {
// Read new bytes if needed
//如果没有数据,那么尝试从通道中读取数据,确保有数据可以继续处理,返回直接返回false,等待下次通道的read事件
if (byteBuffer.position() >= byteBuffer.limit()) {
if (!fill(false)) // request line parsing
return false;
}
byte chr = byteBuffer.get();
//直到不是空格或者table
if (!(chr == Constants.SP || chr == Constants.HT)) {
space = false;
byteBuffer.position(byteBuffer.position() - 1);
}
}
parsingRequestLineStart = byteBuffer.position();
parsingRequestLinePhase = 4;
}
//第四阶段:解析uri和queryString
if (parsingRequestLinePhase == 4) {
// Mark the current buffer position
int end = 0;
//
// Reading the URI
//
boolean space = false;
while (!space) {
// Read new bytes if needed
///如果没有数据,那么尝试从通道中读取数据,确保有数据可以继续处理,返回直接返回false,等待下次通道的read事件
if (byteBuffer.position() >= byteBuffer.limit()) {
if (!fill(false)) // request line parsing
return false;
}
int pos = byteBuffer.position();
byte chr = byteBuffer.get();
//循环读取到空白符合table符号,如果读取到了这些符号,那么本阶段结束,也就是查询字符串已经读取完毕
if (chr == Constants.SP || chr == Constants.HT) {
space = true;
end = pos;
//换行符,表示到头了
} else if (chr == Constants.CR || chr == Constants.LF) {
// HTTP/0.9 style request
parsingRequestLineEol = true;
space = true;
end = pos;
//如果读取到了问号,恭喜,可以解析uri和查询参数了
} else if (chr == Constants.QUESTION && parsingRequestLineQPos == -1) {
parsingRequestLineQPos = pos;
//如果读取到了问号并且确认它就是问号,如果不是,那么抛错
} else if (parsingRequestLineQPos != -1 && !httpParser.isQueryRelaxed(chr)) {
// %nn decoding will be checked at the point of decoding
throw new IllegalArgumentException(sm.getString("iib.invalidRequestTarget"));
} else if (httpParser.isNotRequestTargetRelaxed(chr)) {
// This is a general check that aims to catch problems early
// Detailed checking of each part of the request target will
// happen in Http11Processor#prepareRequest()
throw new IllegalArgumentException(sm.getString("iib.invalidRequestTarget"));
}
}
//是否读取到了问号
if (parsingRequestLineQPos >= 0) {
//圈定查询参数范围
request.queryString().setBytes(byteBuffer.array(), parsingRequestLineQPos + 1,
end - parsingRequestLineQPos - 1);
//圈定uri范围
request.requestURI().setBytes(byteBuffer.array(), parsingRequestLineStart,
parsingRequestLineQPos - parsingRequestLineStart);
} else {
//圈定uri范围
request.requestURI().setBytes(byteBuffer.array(), parsingRequestLineStart,
end - parsingRequestLineStart);
}
parsingRequestLinePhase = 5;
}
//第五阶段:跳过空白符
if (parsingRequestLinePhase == 5) {
// Spec says single SP but also be tolerant of multiple and/or HT
boolean space = true;
while (space) {
// Read new bytes if needed
if (byteBuffer.position() >= byteBuffer.limit()) {
if (!fill(false)) // request line parsing
return false;
}
byte chr = byteBuffer.get();
if (!(chr == Constants.SP || chr == Constants.HT)) {
space = false;
byteBuffer.position(byteBuffer.position() - 1);
}
}
parsingRequestLineStart = byteBuffer.position();
parsingRequestLinePhase = 6;
// Mark the current buffer position
end = 0;
}
//第6阶段:解析协议
if (parsingRequestLinePhase == 6) {
//
// Reading the protocol
// Protocol is always "HTTP/" DIGIT "." DIGIT
//一直读取到行的结尾
while (!parsingRequestLineEol) {
// Read new bytes if needed
if (byteBuffer.position() >= byteBuffer.limit()) {
if (!fill(false)) // request line parsing
return false;
}
int pos = byteBuffer.position();
byte chr = byteBuffer.get();
//回车符
if (chr == Constants.CR) {
end = pos;
//换行符
} else if (chr == Constants.LF) {
if (end == 0) {
end = pos;
}
//标记读取的行已经结束
parsingRequestLineEol = true;
//是否是协议字符,比如http/1.1
} else if (!HttpParser.isHttpProtocol(chr)) {
throw new IllegalArgumentException(sm.getString("iib.invalidHttpProtocol"));
}
}
//圈定协议字符串范围
if ((end - parsingRequestLineStart) > 0) {
request.protocol().setBytes(byteBuffer.array(), parsingRequestLineStart,
end - parsingRequestLineStart);
} else {
//如果没有协议,设置为空字符串
request.protocol().setString("");
}
//重置解析的行尾false,用于下次解析
parsingRequestLine = false;
//解析阶段重置为0
parsingRequestLinePhase = 0;
//行结束标识重置为false
parsingRequestLineEol = false;
//行读取的字符小标位置重置为零
parsingRequestLineStart = 0;
return true;
}
throw new IllegalStateException(
"Invalid request line parse phase:" + parsingRequestLinePhase);
}
上面的代码中有这么一个类HttpParser,下面是它的静态块
static {
//ARRAY_SIZE = 128
for (int i = 0; i < ARRAY_SIZE; i++) {
// Control> 0-31, 127
if (i < 32 || i == 127) {
IS_CONTROL[i] = true;
}
// Separator
if ( i == '(' || i == ')' || i == '<' || i == '>' || i == '@' ||
i == ',' || i == ';' || i == ':' || i == '\\' || i == '\"' ||
i == '/' || i == '[' || i == ']' || i == '?' || i == '=' ||
i == '{' || i == '}' || i == ' ' || i == '\t') {
IS_SEPARATOR[i] = true;
}
// Token: Anything 0-127 that is not a control and not a separator
if (!IS_CONTROL[i] && !IS_SEPARATOR[i] && i < 128) {
IS_TOKEN[i] = true;
}
// Hex: 0-9, a-f, A-F
if ((i >= '0' && i <='9') || (i >= 'a' && i <= 'f') || (i >= 'A' && i <= 'F')) {
IS_HEX[i] = true;
}
// Not valid for HTTP protocol
// "HTTP/" DIGIT "." DIGIT
if (i == 'H' || i == 'T' || i == 'P' || i == '/' || i == '.' || (i >= '0' && i <= '9')) {
IS_HTTP_PROTOCOL[i] = true;
}
if (i >= '0' && i <= '9') {
IS_NUMERIC[i] = true;
}
if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z') {
IS_ALPHA[i] = true;
}
if (IS_ALPHA[i] || IS_NUMERIC[i] || i == '-' || i == '.' || i == '_' || i == '~') {
IS_UNRESERVED[i] = true;
}
if (i == '!' || i == '$' || i == '&' || i == '\'' || i == '(' || i == ')' || i == '*' ||
i == '+' || i == ',' || i == ';' || i == '=') {
IS_SUBDELIM[i] = true;
}
// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
if (IS_UNRESERVED[i] || i == '%' || IS_SUBDELIM[i] || i == ':') {
IS_USERINFO[i] = true;
}
// The characters that are normally not permitted for which the
// restrictions may be relaxed when used in the path and/or query
// string
if (i == '\"' || i == '<' || i == '>' || i == '[' || i == '\\' || i == ']' ||
i == '^' || i == '`' || i == '{' || i == '|' || i == '}') {
IS_RELAXABLE[i] = true;
}
}
String prop = System.getProperty("tomcat.util.http.parser.HttpParser.requestTargetAllow");
if (prop != null) {
for (int i = 0; i < prop.length(); i++) {
char c = prop.charAt(i);
if (c == '{' || c == '}' || c == '|') {
REQUEST_TARGET_ALLOW[c] = true;
} else {
log.warn(sm.getString("http.invalidRequestTargetCharacter",
Character.valueOf(c)));
}
}
}
DEFAULT = new HttpParser(null, null);
}
对应字符的位置的数组值为true,如果我们要判断某些字符是否表示http协议,那么可以通过对应的IS_HTTP_PROTOCOL数组类判断,如果连续都是它,那么就是http协议了,总结一下上面一段代码所做的事情,假设有以下请求
GET /index.html?name=xxx&password=123456 HTTP/1.1
Host: www.xxxx.com
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
Accept-Encoding: gzip, deflate, sdch
Accept-Language: zh-CN,zh;q=0.8,en;q=0.6
请求体数据
当触发read事件时,tomcat需要对这个请求的http协议进行解析,首先先解析协议,分以下阶段解析:
- 第一阶段:循环跳过空白符
- 第二阶段:解析方法,比如这里的GET方法
- 第三阶段:跳过空白符
- 第四阶段:解析uri和查询字段,这里的uri就是/index.html,查询字段就是name=xxx&password=123456
- 第五阶段:跳过空白符
- 第六阶段:解析协议,这里就是HTTP/1.1
如果在读取的过程中,数据不够,无法完整的读取完开始行,那么将socket状态设置为SocketState.LONG,tomcat重新注册读事件,等待下次触发read事件
好了,接下来继续看到org.apache.coyote.http11.Http11Processor的service(SocketWrapperBase<?>)方法中解析请求的方法
boolean org.apache.coyote.http11.Http11InputBuffer.parseHeaders() throws IOException {
if (!parsingHeader) {
throw new IllegalStateException(sm.getString("iib.parseheaders.ise.error"));
}
//设置请求头解析状态为还有更多的请求头需要解析
HeaderParseStatus status = HeaderParseStatus.HAVE_MORE_HEADERS;
do {
//解析请求头
status = parseHeader();
//如果缓冲区读取的请求头已经超过最大限制,抛错,如果剩余容量不够存下socketReadBufferSize个字节,那么也会抛错错误
if (byteBuffer.position() > headerBufferSize || byteBuffer.capacity() - byteBuffer.position() < socketReadBufferSize) {
throw new IllegalArgumentException(sm.getString("iib.requestheadertoolarge.error"));
}
} while (status == HeaderParseStatus.HAVE_MORE_HEADERS);
//表示读取完成
if (status == HeaderParseStatus.DONE) {
parsingHeader = false;
//将最后的写入位置position,设置为结束位置
end = byteBuffer.position();
return true;
} else {
return false;
}
}
解析请求头逻辑
private HeaderParseStatus org.apache.coyote.http11.Http11InputBuffer.parseHeader() throws IOException {
//
// Check for blank line
//
byte chr = 0;
//请求头解析位置,在构建Http11InputBuffer的构造器中,赋了初始值为HeaderParsePosition.HEADER_START
while (headerParsePos == HeaderParsePosition.HEADER_START) {
// Read new bytes if needed
if (byteBuffer.position() >= byteBuffer.limit()) {
//尝试从通道中读取数据
if (!fill(false)) {// parse header
headerParsePos = HeaderParsePosition.HEADER_START;
//没有读取到数据时,设置解析状态为需要更多的数据
return HeaderParseStatus.NEED_MORE_DATA;
}
}
//获取字符
chr = byteBuffer.get();
//如果是回车符,跳过,期待下一个换行符
if (chr == Constants.CR) {
// Skip
//如果读取到了换行符,表示请求头解析结束,根据http协议的规范,请求结束后会有一个空行表示结束
} else if (chr == Constants.LF) {
return HeaderParseStatus.DONE;
//其他字符
} else {
//复位被get吞掉的字符
byteBuffer.position(byteBuffer.position() - 1);
//跳出循环
break;
}
}
//
if (headerParsePos == HeaderParsePosition.HEADER_START) {
// Mark the current buffer position
//标记当前解析的位置
headerData.start = byteBuffer.position();
//设置解析点为处理请求头名字
headerParsePos = HeaderParsePosition.HEADER_NAME;
}
//
// Reading the header name
// Header name is always US-ASCII
//
//解析请求头name
while (headerParsePos == HeaderParsePosition.HEADER_NAME) {
// Read new bytes if needed
if (byteBuffer.position() >= byteBuffer.limit()) {
if (!fill(false)) { // parse header
return HeaderParseStatus.NEED_MORE_DATA;
}
}
int pos = byteBuffer.position();
chr = byteBuffer.get();
//如果是冒号
if (chr == Constants.COLON) {
//那么读取请求头值的开始
headerParsePos = HeaderParsePosition.HEADER_VALUE_START;
//记录请求头名字,然后返回与其相对的MimeHeaderField的value(buffer)对象
headerData.headerValue = headers.addValue(byteBuffer.array(), headerData.start,
pos - headerData.start);
pos = byteBuffer.position();
// Mark the current buffer position
//记录当前冒号的位置
headerData.start = pos;
headerData.realPos = pos;
headerData.lastSignificantChar = pos;
break;
//如果不是普通字符,a-zA-Z,像数字啊,(,?,:,@等字符就不属于token字符
//如果不是token字符,那么跳过这行
} else if (!HttpParser.isToken(chr)) {
// Non-token characters are illegal in header names
// Parsing continues so the error can be reported in context
headerData.lastSignificantChar = pos;
byteBuffer.position(byteBuffer.position() - 1);
// skipLine() will handle the error
return skipLine();
}
// chr is next byte of header name. Convert to lowercase.
//将大写的字符,设置为小写的字符
if ((chr >= Constants.A) && (chr <= Constants.Z)) {
byteBuffer.put(pos, (byte) (chr - Constants.LC_OFFSET));
}
}
// Skip the line and ignore the header
//如果是跳过状态,那么跳过行,为什么在这里又要判断一遍,因为从通道读取的字符不一定是完整的
//下次通道触发读取事件的时候可以接着这个状态执行
if (headerParsePos == HeaderParsePosition.HEADER_SKIPLINE) {
return skipLine();
}
//
// Reading the header value (which can be spanned over multiple lines)
//
//开始处理值
while (headerParsePos == HeaderParsePosition.HEADER_VALUE_START ||
headerParsePos == HeaderParsePosition.HEADER_VALUE ||
headerParsePos == HeaderParsePosition.HEADER_MULTI_LINE) {
if (headerParsePos == HeaderParsePosition.HEADER_VALUE_START) {
// Skipping spaces
//跳过空白符
while (true) {
// Read new bytes if needed
if (byteBuffer.position() >= byteBuffer.limit()) {
if (!fill(false)) {// parse header
// HEADER_VALUE_START
return HeaderParseStatus.NEED_MORE_DATA;
}
}
chr = byteBuffer.get();
//如果不是空格和table符,break,并设置解析点为处理值
if (!(chr == Constants.SP || chr == Constants.HT)) {
headerParsePos = HeaderParsePosition.HEADER_VALUE;
byteBuffer.position(byteBuffer.position() - 1);
break;
}
}
}
if (headerParsePos == HeaderParsePosition.HEADER_VALUE) {
// Reading bytes until the end of the line
//标识是否读到行的末尾
boolean eol = false;
while (!eol) {
// Read new bytes if needed
if (byteBuffer.position() >= byteBuffer.limit()) {
if (!fill(false)) {// parse header
// HEADER_VALUE
return HeaderParseStatus.NEED_MORE_DATA;
}
}
chr = byteBuffer.get();
//回车
if (chr == Constants.CR) {
// Skip
//换行符
} else if (chr == Constants.LF) {
eol = true;
//如果是空白字符,有人说了,上面不是已经跳过空白符了吗?
//注意,上面跳过的是:与值之间的空白符,未跳过值后面的空白符,比如Host: www.b aidu. com \r\n
//它只是跳过了Host:与www之间的空白符
} else if (chr == Constants.SP || chr == Constants.HT) {
//重新填充数据
byteBuffer.put(headerData.realPos, chr);
//实际位置(何为实际位置,就是去除空白后的位置,空白不认为是实际的位置)
headerData.realPos++;
} else {
//覆盖空白符,填充实际位置
byteBuffer.put(headerData.realPos, chr);
headerData.realPos++;
//记录上次能够读取到非空白字符的位置
//值得注意,像上面的www.b aidu. com,对于realPos会记录到\r\n的前面空白符位置
//但是lastSignificantChar记录的却是m的真实位置
headerData.lastSignificantChar = headerData.realPos;
}
}
// Ignore whitespaces at the end of the line
//这地方挺有意思的,上面刚分析完www.b aidu. com \r\n这种字符,它的realPos会指定到\r的前面的空白符字符的位置
//这里就是直接咔嚓变成了com的这个m的位置,但是记住byteBuffer中依然储存了com后面的空白字符
headerData.realPos = headerData.lastSignificantChar;
// Checking the first character of the new line. If the character
// is a LWS, then it's a multiline header
//新的一行
headerParsePos = HeaderParsePosition.HEADER_MULTI_LINE;
}
// Read new bytes if needed
//如果没有值可读了,那么尝试从通道中读取值,如果没有读取到值,那么返回,等待下次通道的读事件发生
if (byteBuffer.position() >= byteBuffer.limit()) {
if (!fill(false)) {// parse header
// HEADER_MULTI_LINE
return HeaderParseStatus.NEED_MORE_DATA;
}
}
chr = byteBuffer.get(byteBuffer.position());
if (headerParsePos == HeaderParsePosition.HEADER_MULTI_LINE) {
//如果不是空白符,那么可以肯定我们这次读取的字符超过了一行,那么可以从头开始继续解析下一行
if ((chr != Constants.SP) && (chr != Constants.HT)) {
headerParsePos = HeaderParsePosition.HEADER_START;
break;
} else {
// Copying one extra space in the buffer (since there must
// be at least one space inserted between the lines)
//其他情况,继续记录空白符
byteBuffer.put(headerData.realPos, chr);
headerData.realPos++;
headerParsePos = HeaderParsePosition.HEADER_VALUE_START;
}
}
}
// Set the header value
//设置请求头值,headerData.start记录的为当前冒号所在的位置,长度为上次获取到的非空白字符位置减去冒号的位置
headerData.headerValue.setBytes(byteBuffer.array(), headerData.start,
headerData.lastSignificantChar - headerData.start);
//复位(本次读取结束,需要复位数据,以便下次读取的开始)
headerData.recycle();
return HeaderParseStatus.HAVE_MORE_HEADERS;
}
解析请求头,分为这么几个解析状态:
- HeaderParseStatus.DONE:表示解析完成
- HeaderParseStatus.HAVE_MORE_HEADERS:表示还有请求头需要继续解析
- HeaderParseStatus.NEED_MORE_DATA:表示数据不够,需要继续监听read事件获取数据
解析位置有这么几个状态:
- HeaderParsePosition.HEADER_START:表示某行请求头的开始,如果是请求头结束的那一行,那么这个阶段就会读取到回车换行符,解析状态修改为HeaderParseStatus.DONE
- HeaderParsePosition.HEADER_NAME:这个阶段表示读取请求头的名字,比如请求头名字host
- HeaderParsePosition.HEADER_VALUE_START:这个阶段是读取完请求头名之后的一个阶段,这个阶段主要用于跳过冒号和请求头值之间的空白符
- HeaderParsePosition.HEADER_VALUE:这个阶段开始读取请求头值,会去除右边的空白符
- HeaderParsePosition.HEADER_MULTI_LINE:这个阶段表示我们已经解析完一行请求头了,可以检查是否继续读下一行,如果数据够的话,break,重新进入HEADER_START阶段
- HeaderParsePosition.HEADER_SKIPLINE:跳过某行,一般出现非法字符的时候,这行会被跳过
读取完请求头后,设置请求头的结束位置,这个位置又是请求体的开始位置
当tomcat解析完http协议后,需要对解析好的数据做进一步的处理
private void org.apache.coyote.http11.Http11Processor.prepareRequest() {
http11 = true;
http09 = false;
contentDelimitation = false;
//是否支持ssl请求
if (endpoint.isSSLEnabled()) {
//设置请求协议为https
request.scheme().setString("https");
}
//获取协议数据块
MessageBytes protocolMB = request.protocol();
//判断是否为http1.1
if (protocolMB.equals(Constants.HTTP_11)) {
http11 = true;
protocolMB.setString(Constants.HTTP_11);
} else if (protocolMB.equals(Constants.HTTP_10)) {
http11 = false;
keepAlive = false;
protocolMB.setString(Constants.HTTP_10);
} else if (protocolMB.equals("")) {
// HTTP/0.9
http09 = true;
http11 = false;
keepAlive = false;
} else {
// Unsupported protocol
http11 = false;
// Send 505; Unsupported HTTP version
//设置错误码,不支持的http版本
response.setStatus(505);
setErrorState(ErrorState.CLOSE_CLEAN, null);
if (log.isDebugEnabled()) {
log.debug(sm.getString("http11processor.request.prepare")+
" Unsupported HTTP version \""+protocolMB+"\"");
}
}
//获取请求头
MimeHeaders headers = request.getMimeHeaders();
// Check connection header
//获取connect请求头属性
MessageBytes connectionValueMB = headers.getValue(Constants.CONNECTION);
if (connectionValueMB != null) {
ByteChunk connectionValueBC = connectionValueMB.getByteChunk();
//connect的值中是否包含close字符串,如果包含,那么长连接设置为false
if (findBytes(connectionValueBC, Constants.CLOSE_BYTES) != -1) {
keepAlive = false;
//是否包含keep-alive字符串
} else if (findBytes(connectionValueBC,
Constants.KEEPALIVE_BYTES) != -1) {
//设置长连接
keepAlive = true;
}
}
if (http11) {
//对这个属性不了解,跳过
MessageBytes expectMB = headers.getValue("expect");
if (expectMB != null) {
if (expectMB.indexOfIgnoreCase("100-continue", 0) != -1) {
inputBuffer.setSwallowInput(false);
request.setExpectation(true);
} else {
response.setStatus(HttpServletResponse.SC_EXPECTATION_FAILED);
setErrorState(ErrorState.CLOSE_CLEAN, null);
}
}
}
// Check user-agent header
//处理user-agent属性
if (restrictedUserAgents != null && (http11 || keepAlive)) {
MessageBytes userAgentValueMB = headers.getValue("user-agent");
// Check in the restricted list, and adjust the http11
// and keepAlive flags accordingly
if(userAgentValueMB != null) {
String userAgentValue = userAgentValueMB.toString();
if (restrictedUserAgents != null &&
restrictedUserAgents.matcher(userAgentValue).matches()) {
http11 = false;
keepAlive = false;
}
}
}
// Check host header
MessageBytes hostValueMB = null;
try {
//获取host属性
hostValueMB = headers.getUniqueValue("host");
} catch (IllegalArgumentException iae) {
// Multiple Host headers are not permitted
// 400 - Bad request
response.setStatus(400);
setErrorState(ErrorState.CLOSE_CLEAN, null);
if (log.isDebugEnabled()) {
log.debug(sm.getString("http11processor.request.multipleHosts"));
}
}
//如果没有指定从何处来的请求,抛出400,错误的请求
if (http11 && hostValueMB == null) {
// 400 - Bad request
response.setStatus(400);
setErrorState(ErrorState.CLOSE_CLEAN, null);
if (log.isDebugEnabled()) {
log.debug(sm.getString("http11processor.request.noHostHeader"));
}
}
// Check for an absolute-URI less the query string which has already
// been removed during the parsing of the request line
//处理uri
ByteChunk uriBC = request.requestURI().getByteChunk();
byte[] uriB = uriBC.getBytes();
//http开头
if (uriBC.startsWithIgnoreCase("http", 0)) {
int pos = 4;
// Check for https
//https开头
if (uriBC.startsWithIgnoreCase("s", pos)) {
pos++;
}
// Next 3 characters must be "://"
//http(s)://开头
if (uriBC.startsWith("://", pos)) {
pos += 3;
//uri在数据块中的偏移位置
int uriBCStart = uriBC.getStart();
// '/' does not appear in the authority so use the first
// instance to split the authority and the path segments
//寻找到http(s)://后面字符的第一个/
int slashPos = uriBC.indexOf('/', pos);
// '@' in the authority delimits the userinfo
//获取at的位置,一般邮箱地址会这个
int atPos = uriBC.indexOf('@', pos);
if (slashPos > -1 && atPos > slashPos) {
// First '@' is in the path segments so no userinfo
atPos = -1;
}
如果没有/,比如请求地址为https://www.baidu.com,那么设置uri为/
if (slashPos == -1) {
slashPos = uriBC.getLength();
// Set URI as "/". Use 6 as it will always be a '/'.
// 01234567
// http://
// https://
request.requestURI().setBytes(uriB, uriBCStart + 6, 1);
} else {
//其他情况,比如:https://www.baidu.com/index,那么uri为/index
request.requestURI().setBytes(uriB, uriBCStart + slashPos, uriBC.getLength() - slashPos);
}
// Skip any user info
//跳过用户信息
if (atPos != -1) {
// Validate the userinfo
for (; pos < atPos; pos++) {
byte c = uriB[uriBCStart + pos];
if (!HttpParser.isUserInfo(c)) {
// Strictly there needs to be a check for valid %nn
// encoding here but skip it since it will never be
// decoded because the userinfo is ignored
response.setStatus(400);
setErrorState(ErrorState.CLOSE_CLEAN, null);
if (log.isDebugEnabled()) {
log.debug(sm.getString("http11processor.request.invalidUserInfo"));
}
break;
}
}
// Skip the '@'
pos = atPos + 1;
}
if (http11) {
// Missing host header is illegal but handled above
if (hostValueMB != null) {
// Any host in the request line must be consistent with
// the Host header
//请求头中的host属性与url中解析的host不同
if (!hostValueMB.getByteChunk().equals(
uriB, uriBCStart + pos, slashPos - pos)) {
//是否允许不匹配的情况
if (allowHostHeaderMismatch) {
// The requirements of RFC 2616 are being
// applied. If the host header and the request
// line do not agree, the request line takes
// precedence
hostValueMB = headers.setValue("host");
//以url中的host为准
hostValueMB.setBytes(uriB, uriBCStart + pos, slashPos - pos);
} else {
// The requirements of RFC 7230 are being
// applied. If the host header and the request
// line do not agree, trigger a 400 response.
//不允许的话,只能报错了
response.setStatus(400);
setErrorState(ErrorState.CLOSE_CLEAN, null);
if (log.isDebugEnabled()) {
log.debug(sm.getString("http11processor.request.inconsistentHosts"));
}
}
}
}
} else {
// Not HTTP/1.1 - no Host header so generate one since
// Tomcat internals assume it is set
//不是http1.1协议的情况,host的值从url中获取
hostValueMB = headers.setValue("host");
hostValueMB.setBytes(uriB, uriBCStart + pos, slashPos - pos);
}
} else {
//设置错误编码
response.setStatus(400);
setErrorState(ErrorState.CLOSE_CLEAN, null);
if (log.isDebugEnabled()) {
log.debug(sm.getString("http11processor.request.invalidScheme"));
}
}
}
// Validate the characters in the URI. %nn decoding will be checked at
// the point of decoding.
for (int i = uriBC.getStart(); i < uriBC.getEnd(); i++) {
//校验uri的字符,对于一些不规范的uri,返回400错误码
if (!httpParser.isAbsolutePathRelaxed(uriB[i])) {
response.setStatus(400);
setErrorState(ErrorState.CLOSE_CLEAN, null);
if (log.isDebugEnabled()) {
log.debug(sm.getString("http11processor.request.invalidUri"));
}
break;
}
}
// Input filter setup
//获取读取数据的过滤器
InputFilter[] inputFilters = inputBuffer.getFilters();
// Parse transfer-encoding header
if (http11) {
//解析转换编码请求头属性
MessageBytes transferEncodingValueMB = headers.getValue("transfer-encoding");
if (transferEncodingValueMB != null) {
String transferEncodingValue = transferEncodingValueMB.toString();
// Parse the comma separated list. "identity" codings are ignored
int startPos = 0;
int commaPos = transferEncodingValue.indexOf(',');
String encodingName = null;
while (commaPos != -1) {
//然后添加编码转换过滤器,在读取数据的时候会进行编码
encodingName = transferEncodingValue.substring(startPos, commaPos);
addInputFilter(inputFilters, encodingName);
startPos = commaPos + 1;
commaPos = transferEncodingValue.indexOf(',', startPos);
}
encodingName = transferEncodingValue.substring(startPos);
addInputFilter(inputFilters, encodingName);
}
}
// Parse content-length header
//如果请求中携带了数据,比如post方法的表达,json,文件流等
long contentLength = request.getContentLengthLong();
if (contentLength >= 0) {
if (contentDelimitation) {
// contentDelimitation being true at this point indicates that
// chunked encoding is being used but chunked encoding should
// not be used with a content length. RFC 2616, section 4.4,
// bullet 3 states Content-Length must be ignored in this case -
// so remove it.
headers.removeHeader("content-length");
request.setContentLength(-1);
} else {
inputBuffer.addActiveFilter(inputFilters[Constants.IDENTITY_FILTER]);
contentDelimitation = true;
}
}
// Validate host name and extract port if present
//解析主机和端口
parseHost(hostValueMB);
if (!contentDelimitation) {
// If there's no content length
// (broken HTTP/1.0 or HTTP/1.1), assume
// the client is not broken and didn't send a body
//设置void过滤器,将不读取到任何数据
inputBuffer.addActiveFilter(inputFilters[Constants.VOID_FILTER]);
contentDelimitation = true;
}
if (getErrorState().isError()) {
getAdapter().log(request, response, 0);
}
}
以下是时序图:
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?