记一次connection refused问题排查过程
背景
项目上线后,稳定运行8个月后,陆陆续续在ELK日志查询平台发现如下报错:
java.lang.Exception: org.apache.http.conn.HttpHostConnectException: Connect to qyapi.weixin.qq.com:443 [qyapi.weixin.qq.com/81.69.54.213, qyapi.weixin.qq.com/81.69.87.29] failed: Connection refused
at com.johnny.common.util.HttpUtil.doGet(HttpUtil.java:56)
at com.johnny.common.util.SendWeChatUtil.getToken(SendWeChatUtil.java:189)
at com.johnny.common.util.SendWeChatUtil.sendWeChat(SendWeChatUtil.java:57)
at com.johnny.autojob.business.dataprovider.business.TargetProvider.execAutoWorkTo(TargetProvider.java:81)
at com.johnny.autojob.business.dataprovider.business.JdbcDataProvider.execAutoWork(JdbcDataProvider.java:178)
at com.johnny.autojob.business.service.DataProviderService.execAutoWork(DataProviderService.java:193)
at com.johnny.autojob.business.xxljob.IviewAutoJobHandler.execute(IviewAutoJobHandler.java:96)
at com.johnny.job.core.thread.JobThread.run(JobThread.java:142)
Caused by: org.apache.http.conn.HttpHostConnectException: Connect to qyapi.weixin.qq.com:443 [qyapi.weixin.qq.com/81.69.54.213, qyapi.weixin.qq.com/81.69.87.29] failed: Connection refused
at org.apache.http.impl.conn.DefaultHttpClientConnectionOperator.connect(DefaultHttpClientConnectionOperator.java:156)
at org.apache.http.impl.conn.PoolingHttpClientConnectionManager.connect(PoolingHttpClientConnectionManager.java:376)
at org.apache.http.impl.execchain.MainClientExec.establishRoute(MainClientExec.java:393)
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:236)
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186)
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:89)
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108)
at com.johnny.common.util.HttpUtil.doGet(HttpUtil.java:46)
... 7 common frames omitted
Caused by: java.net.ConnectException: Connection refused
at java.net.PlainSocketImpl.socketConnect(Native Method)
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
at java.net.Socket.connect(Socket.java:589)
at org.apache.http.conn.ssl.SSLConnectionSocketFactory.connectSocket(SSLConnectionSocketFactory.java:368)
at org.apache.http.impl.conn.DefaultHttpClientConnectionOperator.connect(DefaultHttpClientConnectionOperator.java:142)
... 17 common frames omitted
或(省略同上面完全一致的apache httpClient类的错误stacktrace):
18:40:06.310 [Job-Thread-16876] ERROR c.x.c.a.b.d.business.TargetProvider - execAutoWorkTo target error
java.lang.Exception: org.apache.http.conn.HttpHostConnectException: Connect to qyapi.weixin.qq.com:443 [qyapi.weixin.qq.com/81.69.54.213, qyapi.weixin.qq.com/81.69.87.29] failed: Connection refused
at com.johnny.common.util.HttpUtil.postJson(HttpUtil.java:123)
at com.johnny.common.util.SendWeChatUtil.sendGroup(SendWeChatUtil.java:116)
at com.johnny.autojob.business.dataprovider.business.TargetProvider.execAutoWorkTo(TargetProvider.java:92)
at com.johnny.autojob.business.dataprovider.business.JdbcDataProvider.execAutoWork(JdbcDataProvider.java:178)
at com.johnny.autojob.business.service.DataProviderService.execAutoWork(DataProviderService.java:193)
at com.johnny.autojob.business.xxljob.IviewAutoJobHandler.execute(IviewAutoJobHandler.java:96)
at com.johnny.job.core.thread.JobThread.run(JobThread.java:142)
分析
如上截图,19个小时有324次报错,其实已经是很严重的问题。
因为应用上线有8个月左右,之前一直都是好的,所以第一直觉会是代码问题。但是代码也看不出问题,鉴于应用有1个多月未发布(上一次发布是8月9日),怀疑是HttpUtil工具类的问题,http连接未释放,并像模像样去查看http
看不出问题,重启试试吧。
但是并没有解决问题。
然后网络上各种百度及Google,找到HTTP context的使用,参考此文增加retryHandler逻辑:
@Slf4j
public class HttpUtil {
public static String doGet(String url) throws Exception {
// 创建Httpclient对象
// CloseableHttpClient httpclient = HttpClients.createDefault();
CloseableHttpClient httpclient = HttpClients.custom()
.setRetryHandler(buildRetryHandler())
.build();
// 创建http GET请求
HttpGet httpGet = new HttpGet(url);
CloseableHttpResponse response = null;
String content = "";
try {
// 执行请求
response = httpclient.execute(httpGet);
// 判断返回状态是否为200
if (response.getStatusLine().getStatusCode() == 200) {
//请求体内容
content = EntityUtils.toString(response.getEntity(), "UTF-8");
} else {
log.error("doget error, url:{}, return code:{}", url,
response.getStatusLine().getStatusCode());
}
} catch (Exception e) {
// 出错行
throw new Exception(e);
} finally {
if (response != null) {
response.close();
}
//相当于关闭浏览器
httpclient.close();
}
return content;
}
public static String postJson(String url, String body) throws Exception {
String result = "";
// 创建httpPost远程连接实例
// 去掉毫无意义的重试逻辑:日志记录看下来,两次重试间隔不过5ms,几乎没有成功
for (int i = 0; i < 1; i++) {
HttpPost httpPost = new HttpPost(url);
// 配置请求参数实例
// 设置连接主机服务超时时间
RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(35000)
// 设置连接请求超时时间
.setConnectionRequestTimeout(35000)
// 设置读取数据连接超时时间
.setSocketTimeout(60000)
.build();
// 为httpPost实例设置配置
httpPost.setConfig(requestConfig);
httpPost.addHeader("content-type", "application/json;charset=utf-8");
httpPost.addHeader("accept", "application/json");
httpPost.setEntity(new StringEntity(body, StandardCharsets.UTF_8));
int code = 0;
// 之前的写法,并且是直接放在try () 语句块里面的
// CloseableHttpClient httpClient = HttpClients.createDefault();
CloseableHttpClient httpClient = HttpClients.custom()
.setRetryHandler(buildRetryHandler())
.build();
try (CloseableHttpResponse httpResponse = httpClient.execute(httpPost)) {
code = httpResponse.getStatusLine().getStatusCode();
// 从响应对象中获取响应内容
HttpEntity entity = httpResponse.getEntity();
result = EntityUtils.toString(entity);
if (200 == code) {
break;
}
} catch (IOException e) {
// 降级最底层log级别(异常catch后又throw导致外层又记录error log)
log.warn("postJson error url:{}, code:{}, error:{}", url, code, e);
// 出错行
throw new Exception(e);
} finally {
httpPost.releaseConnection();
}
}
return result;
}
// https://www.cnblogs.com/xidongyu/articles/5215084.html
private static HttpRequestRetryHandler buildRetryHandler() {
return (exception, executionCount, context) -> {
if (executionCount >= 3) {
// Do not retry if over max retry count
return false;
}
if (exception instanceof ConnectTimeoutException) {
// Connection refused
return false;
}
HttpClientContext clientContext = HttpClientContext.adapt(context);
HttpRequest request = clientContext.getRequest();
// Retry if the request is considered idempotent
return !(request instanceof HttpEntityEnclosingRequest);
};
}
}
发布后的tcp连接状态也看不出问题。
不过,发现经过此次优化后,内存占用大幅度下降:
咨询系统运维(偏业务对接),答复是:看下来这个解析已经是到腾讯云的机器,这个IP也是腾讯云,连接拒绝的话,有可能是出公网后,网络波动或者对方的节点刚好不稳定。还有一种可能,就是是不是网络权限没有完全开通,之前有同事遇到过,是说要网络权限全部放行的说法。
但是我的应用都上线大半年,之前一直是好的。运维说,你申请吧。
申请就申请吧,死马当活马医。
第一次申请网络OA权限:
问题依旧存在。
咨询系统运维(偏出口网络)同事,答复是:企微这两个IP可能有异常,但是我遇到的异常不仅仅是这两个IP导致的。
于是进一步追问,答复是企微域名CDN解析问题。
于是让我重新提OA申请网络权限,然后他给出一个其他同事提的OA申请网络权限单子(居然也是同一天),列举出全部已知的企微此域名解析出来的IP。
这是什么骚操作??域名解析不是网络该负责的事情吗?我第一次提OA申请网络权限,没有帮我把此域名解析到的所有IP增加我的应用的访问权限??
进一步追问,如果企微这个域名后面增加IP咋办??答复:正常我们通过域名是能解析出来的,解析不出来可能是对端的解析没做好之类的。
我:一脸迷惑???
结局
总之,申请全部IP后,没有再报错。但是有其他企微API问题,不过概率极低,24小时内各出现一次下面错误:
NoHttpResponseException: qyapi.weixin.qq.com:443 failed to respond
java.lang.Exception: org.apache.http.NoHttpResponseException: qyapi.weixin.qq.com:443 failed to respond
at com.johnny.common.util.HttpUtil.postJson(HttpUtil.java:123)
at com.johnny.common.util.SendWeChatUtil.sendMsg(SendWeChatUtil.java:181)
at com.johnny.common.util.SendWeChatUtil.sendWeChat(SendWeChatUtil.java:60)
at com.johnny.autojob.business.dataprovider.business.TargetProvider.execAutoWorkTo(TargetProvider.java:81)
at com.johnny.autojob.business.dataprovider.business.JdbcDataProvider.execAutoWork(JdbcDataProvider.java:178)
at com.johnny.autojob.business.service.DataProviderService.execAutoWork(DataProviderService.java:193)
at com.johnny.autojob.business.xxljob.IviewAutoJobHandler.execute(IviewAutoJobHandler.java:96)
at com.johnny.job.core.thread.JobThread.run(JobThread.java:142)
Caused by: org.apache.http.NoHttpResponseException: qyapi.weixin.qq.com:443 failed to respond
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:141)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:56)
at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:259)
at org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:163)
at org.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:157)
at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:273)
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:125)
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:272)
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186)
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:89)
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108)
at com.johnny.common.util.HttpUtil.postJson(HttpUtil.java:112)
... 7 common frames omitted
SocketTimeoutException: Read timed out
java.lang.Exception: java.net.SocketTimeoutException: Read timed out
at com.johnny.common.util.HttpUtil.postJson(HttpUtil.java:123)
at com.johnny.common.util.SendWeChatUtil.sendGroup(SendWeChatUtil.java:116)
at com.johnny.autojob.business.dataprovider.business.TargetProvider.execAutoWorkTo(TargetProvider.java:92)
at com.johnny.autojob.business.dataprovider.business.JdbcDataProvider.execAutoWork(JdbcDataProvider.java:178)
at com.johnny.autojob.business.service.DataProviderService.execAutoWork(DataProviderService.java:193)
at com.johnny.autojob.business.xxljob.IviewAutoJobHandler.execute(IviewAutoJobHandler.java:96)
at com.johnny.job.core.thread.JobThread.run(JobThread.java:142)
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:170)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at sun.security.ssl.InputRecord.readFully(InputRecord.java:465)
at sun.security.ssl.InputRecord.read(InputRecord.java:503)
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:973)
at sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:930)
at sun.security.ssl.AppInputStream.read(AppInputStream.java:105)
at org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
at org.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:153)
at org.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:280)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:138)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:56)
at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:259)
at org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:163)
at org.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:157)
at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:273)
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:125)
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:272)
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186)
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:89)
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108)
at com.johnny.common.util.HttpUtil.postJson(HttpUtil.java:112)
... 6 common frames omitted