记一次XML文件读取优化
背景
最近在接手公司的一个老项目的时候发现打开软件界面的时候MenuItem的子项Items数据加载极其缓慢,造成点击界面的时候界面卡顿,最后分析原因是由于在读取XML的时候没有充分考虑到性能问题从而造成读取操作性能低下,后面经过优化后能够快速提高效率并最终解决界面卡顿的问题,这篇文章就将整个过程记录下来从而便于对整个知识有一个更深入的理解。
using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.RegularExpressions; using System.Xml; namespace WpfApplicationDemo { /// <summary> /// Project name /// <summary> /// Deployment information definition /// </summary> public class DeploymentInfo { /// <summary> /// Deployment name /// </summary> public string DeploymentName { get; set; } /// <summary> /// Comment /// </summary> public string Comment { get; set; } /// <summary> /// Whether deployment is actived? /// </summary> public bool IsDeploymentActived { get; set; } } /// <summary> /// Project information definition /// </summary> public class ProjectInfo { /// </summary> public string ProjectName { get; set; } /// <summary> /// The first letter of ProjectName,which is used for group projectInfo /// if first letter is not letter then return Others as the group /// </summary> public string FirstProjectNameLetter => ProjectName.Length > 1 ? Regex.IsMatch(ProjectName.Substring(0, 1).ToUpper(), "^[a-zA-Z]") ? ProjectName.Substring(0, 1).ToUpper() : "Others" : string.Empty; /// <summary> /// Whether current project is actived? /// </summary> public bool IsProjectActived { get; set; } /// <summary> /// Last modification time /// </summary> public DateTime LastModifyTime { get; set; } /// <summary> /// Last open time /// </summary> public DateTime? LastOpenTime { get; set; } /// <summary> /// Deployment names in current project /// </summary> public List<DeploymentInfo> Deployments { get; set; } } public class ECSHelper { /// <summary> /// Get all existing project information /// </summary> /// <returns></returns> public static List<ProjectInfo> GetExistingProjectInfo() { string activedProjectName = GetActivedProjectName(); string activedDeploymentName = GetActivedDeploymentName(); string projectFolder = MachineConfig.GetEcsProjectDirectory(); var proDirectory = new System.IO.DirectoryInfo(projectFolder); var projectDirArray = proDirectory.GetDirectories("*", System.IO.SearchOption.TopDirectoryOnly).Where(p => File.Exists(p.FullName + "\\ToolControl.config")).ToArray(); //待返回的数据集合 List<ProjectInfo> projectInfoList = new List<ProjectInfo>(); if (projectDirArray != null && projectDirArray.Length > 0) { foreach (var pd in projectDirArray) { try { var fileInfo = new FileInfo(pd.FullName + "\\ToolControl.config"); var projectInfo = new ProjectInfo { ProjectName = pd.Name, Deployments = new List<DeploymentInfo>(), IsProjectActived = String.CompareOrdinal(activedProjectName, pd.Name) == 0, LastModifyTime = fileInfo.LastWriteTime }; var toolCtrlXml = new XmlDocument(); toolCtrlXml.Load(pd.FullName + "\\ToolControl.config"); var deploymentNodes = toolCtrlXml.SelectNodes("/configuration/Ecs/Deployment/Item"); foreach (XmlElement dn in deploymentNodes) { string deploymentName = dn.InnerText; projectInfo.Deployments.Add(new DeploymentInfo { DeploymentName = deploymentName, IsDeploymentActived = String.Compare(activedDeploymentName, deploymentName) == 0 && projectInfo.IsProjectActived }); } projectInfoList.Add(projectInfo); } catch (Exception ex) { throw new Exception(ex.Message); } } } return projectInfoList; } } }
上面的代码最终是要读取下面的一个文件夹中每一个子文件夹中的ToolControl.Config文件,具体文件结构如下面截图所示。
图一 待读取的文件夹结构
我们再来看看每个文件夹中的关键文件ToolControl.Config文件的结构
<configuration> <Security> <EncryptType>NoEncryption</EncryptType> <ProtectionSignature>f95381766e04b634689eec775a23633f</ProtectionSignature> <EncryptionSignature>967fe853d0528878e1449f9cf9db1c6b</EncryptionSignature> </Security> <Ecs> <Header> <Description> </Description> </Header> <Deployment> <Item Comment="">Release</Item> <Item Comment="">Simulation</Item> </Deployment> <ToolControl> <Group Name="TestGroup" Comment="20200911"> <Component Name="BPSI30IoPump" Type="Levitronix.Hardware.Pump.BPSI30IoPump,Levitronix.Hardware.dll" Comment=""> <Deployment>Any</Deployment> <Properties> <Property Name="EnableMaxSpeedLimitCheck">true</Property> <Property Name="AOIndex_ControlOutput">-1</Property> <Property Name="AIIndex_SpeedProcessReading">2</Property> <Property Name="DOIndex_Mode">-1</Property> <Property Name="DOIndex_EnableReset">-1</Property> <Property Name="DIIndex_PumpState">-1</Property> <Property Name="DIIndex_PumpError">-1</Property> <Property Name="ReadingSpeedScale">16000</Property> <Property Name="ReadingPressureScale">60</Property> <Property Name="SettingSpeedScale">16000</Property> <Property Name="SettingPressureScale">60</Property> </Properties> <ParameterAccess> <Parameter Name="ChamberAlarmAction" Privilege="Enginner" /> <Parameter Name="SystemAlarmAction" Privilege="Enginner" /> <Parameter Name="Enable" Privilege="Enginner" /> <Parameter Name="AutoStart" Privilege="Enginner" /> <Parameter Name="DefaultProcessRampTime" Privilege="Enginner" /> <Parameter Name="DefaultSpeedRampTime" Privilege="Enginner" /> <Parameter Name="MaxSpeedLimit" Privilege="Enginner" /> <Parameter Name="DefaultControlMode" Privilege="Enginner" /> <Parameter Name="DefaultPressureTarget" Privilege="Enginner" /> <Parameter Name="DefaultSpeedTarget" Privilege="Enginner" /> </ParameterAccess> <VariablePublish> <Variable Name="ReadingProcessData" Publish="False" /> <Variable Name="IsRunning" Publish="False" /> <Variable Name="IsError" Publish="False" /> <Variable Name="ComponentFullPath" Publish="False" /> </VariablePublish> </Component> </Group> </ToolControl> <UserInterface /> </Ecs> </configuration>
看上面的代码的思路是读取每一个文件夹中的配置文件并通过XmlDocument去Load每一个ToolControl.Config文件,由于在我们的软件中文件夹非常多而且ToolControl.Config文件由于不同的项目配置不同,有些文件夹下面这个配置文件是非常庞大的,所以整个进行循环并且使用XmlDocument去Load每一个文件的时候其实性能是非常受影响的,所以针对上面的代码我做了如下修改,代码在效率方面得到了巨大的提升。
优化
首先我们来看看我们的优化代码
/// <summary> /// Get all existing project information /// </summary> /// <returns></returns> public static List<ProjectInfo> GetExistingProjectInfoEx() { string activedProjectName = GetActivedProjectName(); string activedDeploymentName = GetActivedDeploymentName(); string projectFolder = MachineConfig.GetEcsProjectDirectory(); var proDirectory = new System.IO.DirectoryInfo(projectFolder); var projectDirArray = proDirectory.GetDirectories("*", System.IO.SearchOption.TopDirectoryOnly).Where(p => File.Exists(p.FullName + "\\ToolControl.config")).ToArray(); List<ProjectInfo> projectInfoList = new List<ProjectInfo>(); if (projectDirArray != null && projectDirArray.Length > 0) { var projectBags = new ConcurrentBag<ProjectInfo>(); ParallelLoopResult result = Parallel.ForEach(projectDirArray, pd => { var projectInfo = new ProjectInfo { ProjectName = pd.Name, Deployments = new List<DeploymentInfo>(), IsProjectActived = String.CompareOrdinal(activedProjectName, pd.Name) == 0, LastModifyTime = new FileInfo(pd.FullName + "\\ToolControl.config").LastWriteTime }; //逐一解析Deploy子节点 ParseDeployment(pd,projectInfo,activedDeploymentName); projectBags.Add(projectInfo); }); } return projectInfoList; } private static void ParseDeployment(DirectoryInfo pd, ProjectInfo projectInfo, string activedDeploymentName) { var settings = new XmlReaderSettings() { IgnoreComments = true, IgnoreWhitespace = true }; using (var xmlReader = XmlReader.Create(pd.FullName + "\\ToolControl.config", settings)) { while (xmlReader.Read()) { if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "Deployment") { XElement eleNode = XNode.ReadFrom(xmlReader) as XElement; IEnumerable<XElement> elements = eleNode.Elements("Item"); foreach (var ele in elements) { string deploymentName = ele.Value; projectInfo.Deployments.Add(new DeploymentInfo() { DeploymentName = deploymentName, IsDeploymentActived = String.Compare(activedDeploymentName, deploymentName) == 0 && projectInfo.IsProjectActived }); } } } } }
这段代码主要从两个方面进行优化,由于这些文件夹都是彼此独立的互相之间没有依赖关系,所以我们这里使用并行Parallel.ForEach来进行并行读取每一个文件夹这样能够很大程度上提高整个代码的读取效率,另外一个很重要的方面就是使用XmlReader来取代XmlDocument,这个主要的优点就在于XmlReader能够边加载边读这样比XmlDocument一次性读取文件效率更高,另外关于XmlReader进行读取的操作过程可以参考这篇文章的分析。
注意事项
1 这里使用Parallel.ForEach进行读取的时候返回的结果应该使用线程安全的ConcurrentBag进行读取。
2 XmlReader比较适合仅仅对文件进行读取的操作,如果需要对文件节点进行修改操作则还需要使用XmlDocument进行读取操作。