[13]Windows 内核情景分析 --- 网络通信
典型的基于tcpip协议套接字方式的网络通信模块层次:
应用程序
socket api
WS2_32.dll
socket irp
Afd.sys
tdi irp
Tcpip.sys
回调函数接口
各Ndis中间层过滤驱动
回调函数接口
小端口驱动
中断交互操作
网卡
应用程序调用WS2_32.dll中的socket api,socket api在内部生成socket irp发给afd.sys这个中间辅助驱动层,afd.sys将socket irp转换成tdi irp发给tcpip协议驱动,协议驱动通过注册的回调函数与小端口驱动(中间可能穿插N个中间层过滤驱动),小端口驱动最终通过中断与网卡交互,操作硬件。
其中,协议驱动、中间层驱动、小端口驱动三者之间的交互是通过ndis.sys这个库函数模块实现的,或者说ndis.sys提供了ndis框架,协议驱动、中间层驱动、小端口驱动三者都得遵循这个框架。
为什么网络通信需要这么复杂的分层?答案是为了减轻开发维护管理工作的需要,分层能够提供最大的灵活性。各层的设计人员只需专注自身模块的设计工作,无需担心其他模块是怎么实现的,只需保持接口一致即可。
如应用程序可以调用socket api就可以实现网络通信,而不管底层是如何实现的。使用socket api还可以使得windows上能兼容运行Unix系统上的网络通信程序,ws2_32.dll这个模块中实现了socket接口。
Afd.sys实际上是一个适配层,他可以适配N种协议驱动。
Tcpip.sys是一种协议驱动(其实是一个协议栈驱动),它内部实现了一套协议栈,决定了如何解析从网卡接收到的包,以及以什么格式将应用程序数据发到网卡。只不过tcpip.sys将收到的包按链路层、网络层、传输层分层三层逐层解析。事实上我们可以完全可以自定义、自编写一个协议驱动,按照我们自己的协议来发包、收包(我们的这个自定义协议驱动可以采用分层机制,也可以采用简单的单层机制),这样在发送方电脑和接收方电脑都安装我们的自定义协议驱动后,发送方就可以按照自定义协议发包,接收方就按照约定的格式解包。
如果不考虑中间驱动,协议驱动是直接与小端口驱动交互的。协议驱动从小端口驱动收包,协议驱动发包给小端口驱动,这就是二者之间的交互。他们之间的交互通过ndis框架预约的一套回调函数接口来实现。
下面我们看各层驱动的实现:
一个协议驱动需要在DriverEntry中将自己注册为一个协议驱动,向ndis框架登记、声明自己的协议特征。
一个协议特征记录了协议的名称以及它提供的各个回调函数
4.0版本的ndis协议特征结构如下定义:
typedef struct _NDIS40_PROTOCOL_CHARACTERISTICS
{
UCHAR MajorNdisVersion;
UCHAR MinorNdisVersion;
__MINGW_EXTENSION union {
UINT Reserved;
UINT Flags;
};
OPEN_ADAPTER_COMPLETE_HANDLER OpenAdapterCompleteHandler;//绑定完成回调函数
CLOSE_ADAPTER_COMPLETE_HANDLER CloseAdapterCompleteHandler;//解除绑定完成回调函数
SEND_COMPLETE_HANDLER SendCompleteHandler;//发送完成回调函数
TRANSFER_DATA_COMPLETE_HANDLER TransferDataCompleteHandler;//转移数据完成回调函数
RESET_COMPLETE_HANDLER ResetCompleteHandler;
REQUEST_COMPLETE_HANDLER RequestCompleteHandler;//ndis请求完成回调函数
RECEIVE_HANDLER ReceiveHandler;//接收函数
RECEIVE_COMPLETE_HANDLER ReceiveCompleteHandler;//接收完成回调函数
STATUS_HANDLER StatusHandler;//状态变换通知回调函数
STATUS_COMPLETE_HANDLER StatusCompleteHandler;//状态变换完成通知回调函数
NDIS_STRING Name;//协议名
RECEIVE_PACKET_HANDLER ReceivePacketHandler;//接收包函数
BIND_HANDLER BindAdapterHandler;//绑定通知回调函数
UNBIND_HANDLER UnbindAdapterHandler;//解除绑定通知回调函数
PNP_EVENT_HANDLER PnPEventHandler;//Pnp事件回调函数
UNLOAD_PROTOCOL_HANDLER UnloadHandler;//协议驱动的卸载例程
} NDIS40_PROTOCOL_CHARACTERISTICS;
下面的函数用于将一个驱动注册为ndis协议驱动
VOID
NdisRegisterProtocol(
OUT PNDIS_STATUS Status,//返回状态
OUT PNDIS_HANDLE NdisProtocolHandle,//返回注册的协议驱动句柄
IN PNDIS_PROTOCOL_CHARACTERISTICS ProtocolCharacteristics,
IN UINT CharacteristicsLength)
{
PPROTOCOL_BINDING Protocol;
NTSTATUS NtStatus;
UINT MinSize;
PNET_PNP_EVENT PnPEvent;
*NdisProtocolHandle = NULL;
switch (ProtocolCharacteristics->MajorNdisVersion)
{
case 0x03:
MinSize = sizeof(NDIS30_PROTOCOL_CHARACTERISTICS);
break;
case 0x04:
MinSize = sizeof(NDIS40_PROTOCOL_CHARACTERISTICS);
break;
case 0x05:
MinSize = sizeof(NDIS50_PROTOCOL_CHARACTERISTICS);
break;
default:
*Status = NDIS_STATUS_BAD_VERSION;
return;
}
if (CharacteristicsLength < MinSize) //结构体的长度必须与声明的ndis版本一致
{
*Status = NDIS_STATUS_BAD_CHARACTERISTICS;
return;
}
//协议驱动句柄实际上是一个PROTOCOL_BINDING结构体指针
Protocol = ExAllocatePool(NonPagedPool, sizeof(PROTOCOL_BINDING));//一个协议驱动描述符
RtlZeroMemory(Protocol, sizeof(PROTOCOL_BINDING));
RtlCopyMemory(&Protocol->Chars, ProtocolCharacteristics, MinSize);//关键。记录协议特征
KeInitializeSpinLock(&Protocol->Lock);
InitializeListHead(&Protocol->AdapterListHead);//该协议驱动绑定的网卡列表初始为空
*NdisProtocolHandle = Protocol;//返回协议驱动的句柄
ndisBindMiniportsToProtocol(Status, Protocol);//关键。刚一注册就在此绑定所有现有网卡
PnPEvent = ProSetupPnPEvent(NetEventBindsComplete, NULL, 0);//构造一个所有绑定完成事件
if (PnPEvent)
{
if (Protocol->Chars.PnPEventHandler)
NtStatus = (*Protocol->Chars.PnPEventHandler)(NULL,PnPEvent);
}
if (*Status == NDIS_STATUS_SUCCESS)
{
ExInterlockedInsertTailList(&ProtocolListHead, &Protocol->ListEntry, &ProtocolListLock);//插入全局的协议驱动链表
}
}
上面最主要的工作便是登记协议特征到驱动描述符中,然后附带绑定现有的已有网卡。下面的函数就是用来绑定所有现有网卡的。
VOID ndisBindMiniportsToProtocol(OUT PNDIS_STATUS Status, IN PPROTOCOL_BINDING Protocol)
{
HANDLE DriverKeyHandle = NULL;
PKEY_VALUE_PARTIAL_INFORMATION KeyInformation = NULL;
PNDIS_PROTOCOL_CHARACTERISTICS ProtocolCharacteristics = &Protocol->Chars;
RegistryPathStr = ExAllocatePoolWithTag(PagedPool, sizeof(SERVICES_KEY) + ProtocolCharacteristics->Name.Length + sizeof(LINKAGE_KEY), NDIS_TAG + __LINE__);
wcscpy(RegistryPathStr, SERVICES_KEY);
wcsncat(RegistryPathStr, ((WCHAR *)ProtocolCharacteristics->Name.Buffer), ProtocolCharacteristics->Name.Length / sizeof(WCHAR));
RegistryPathStr[wcslen(SERVICES_KEY)+ProtocolCharacteristics->Name.Length/sizeof(WCHAR)] = NULL;
wcscat(RegistryPathStr, LINKAGE_KEY);
//经过上面的拼凑,RegistryPathStr最终拼成…\Services\协议名\Linkage
RtlInitUnicodeString(&RegistryPath, RegistryPathStr);
InitializeObjectAttributes(&ObjectAttributes, &RegistryPath, OBJ_CASE_INSENSITIVE, NULL, NULL);
NtStatus = ZwOpenKey(&DriverKeyHandle, KEY_READ, &ObjectAttributes);//打开Linkage键
ExFreePool(RegistryPathStr);
if(!NT_SUCCESS(NtStatus))
{
*Status = NDIS_STATUS_FAILURE;
return;
}
{
UNICODE_STRING ValueName;
ULONG ResultLength;
RtlInitUnicodeString(&ValueName, L"Bind");
NtStatus = ZwQueryValueKey(DriverKeyHandle, &ValueName, KeyValuePartialInformation, NULL, 0, &ResultLength);
KeyInformation = ExAllocatePoolWithTag(PagedPool, sizeof(KEY_VALUE_PARTIAL_INFORMATION) + ResultLength, NDIS_TAG + __LINE__);
//查询Linkage键下的Bind值(多个网卡设备对象名称组成的一条字符串)
NtStatus = ZwQueryValueKey(DriverKeyHandle, &ValueName, KeyValuePartialInformation, KeyInformation,sizeof(KEY_VALUE_PARTIAL_INFORMATION) + ResultLength, &ResultLength);
ZwClose(DriverKeyHandle);
}
*Status = NDIS_STATUS_SUCCESS;
//遍历每个网卡
for (DataPtr = (WCHAR *)KeyInformation->Data;
*DataPtr != 0; DataPtr += wcslen(DataPtr) + 1)
{
VOID *BindContext = NULL;
NDIS_STRING DeviceName;
NDIS_STRING RegistryPath;
WCHAR *RegistryPathStr = NULL;
ULONG PathLength = 0;
// DeviceName为‘\Device\小端口设备对象名称’形式
RtlInitUnicodeString(&DeviceName, DataPtr);
if (!MiniLocateDevice(&DeviceName))//if 那个网卡尚未启动
continue;
if (LocateAdapterBindingByName(Protocol, &DeviceName)) //if 本协议已绑定了那块网卡
continue;
PathLength = sizeof(SERVICES_KEY) +
wcslen( DataPtr + 8 ) * sizeof(WCHAR) +
sizeof(PARAMETERS_KEY) +
ProtocolCharacteristics->Name.Length + sizeof(WCHAR);
RegistryPathStr = ExAllocatePool(PagedPool, PathLength);
wcscpy(RegistryPathStr, SERVICES_KEY);
wcscat(RegistryPathStr, DataPtr + 8 );
wcscat(RegistryPathStr, PARAMETERS_KEY);
wcsncat(RegistryPathStr, ProtocolCharacteristics->Name.Buffer, ProtocolCharacteristics->Name.Length / sizeof(WCHAR) );
RegistryPathStr[PathLength/sizeof(WCHAR) - 1] = 0;
RtlInitUnicodeString(&RegistryPath, RegistryPathStr);
//RegistryPath最终变成…\Services\小端口设备对象名 \Parameters\协议名 形式
{
BIND_HANDLER BindHandler = ProtocolCharacteristics->BindAdapterHandler;
if(BindHandler) //关键,通知协议驱动绑定网卡列表中的每块网卡
BindHandler(Status, BindContext, &DeviceName, &RegistryPath, 0);
}
ExFreePool(KeyInformation);
}
一个驱动注册为协议驱动后,ndis内部会为这个驱动创建一个协议驱动描述符,返回的句柄就是这个结构指针。Typedef PVOID NDIS_HANDLE,可见ndis句柄其实就是一个指针。
typedef struct _PROTOCOL_BINDING { //协议驱动描述符
LIST_ENTRY ListEntry; 用来挂入全局协议驱动链表
KSPIN_LOCK Lock;
NDIS_PROTOCOL_CHARACTERISTICS Chars; //关键。本协议驱动的特征
WORK_QUEUE_ITEM WorkItem;
LIST_ENTRY AdapterListHead; //本协议驱动绑定的所有网卡
} PROTOCOL_BINDING, *PPROTOCOL_BINDING;
同样:小端口驱动也需要在其DriverEntry中将自己注册为一个ndis小端口驱动。
Struct NDIS40_MINIPORT_CHARACTERISTICS //4.0版的小端口驱动特征结构
{
UCHAR MajorNdisVersion;
UCHAR MinorNdisVersion;
UINT Reserved;
W_CHECK_FOR_HANG_HANDLER CheckForHangHandler;
W_DISABLE_INTERRUPT_HANDLER DisableInterruptHandler;//禁用来自特定网卡的中断
W_ENABLE_INTERRUPT_HANDLER EnableInterruptHandler; //启用来自特定网卡的中断
W_HALT_HANDLER HaltHandler;
W_HANDLE_INTERRUPT_HANDLER HandleInterruptHandler;//isr的后半部
W_INITIALIZE_HANDLER InitializeHandler; //IRP_MN_START_DEVICE中调用的启动初始化函数
W_ISR_HANDLER ISRHandler; //我们的isr
W_QUERY_INFORMATION_HANDLER QueryInformationHandler;//处理查询请求的函数
W_RECONFIGURE_HANDLER ReconfigureHandler;
W_RESET_HANDLER ResetHandler;
W_SEND_HANDLER SendHandler; //发送函数
W_SET_INFORMATION_HANDLER SetInformationHandler;//处理设置请求的函数
W_TRANSFER_DATA_HANDLER TransferDataHandler;//处理协议驱动发下来的转移数据请求的函数
W_RETURN_PACKET_HANDLER ReturnPacketHandler; //归还包函数
W_SEND_PACKETS_HANDLER SendPacketsHandler;//发送包函数
W_ALLOCATE_COMPLETE_HANDLER AllocateCompleteHandler;
}
typedef struct _NDIS_M_DRIVER_BLOCK //小端口驱动描述符、句柄
{
LIST_ENTRY ListEntry; //用来挂入全局小端口驱动链表
KSPIN_LOCK Lock;
NDIS_MINIPORT_CHARACTERISTICS MiniportCharacteristics; //特征
WORK_QUEUE_ITEM WorkItem;
PDRIVER_OBJECT DriverObject; //小端口驱动对象
LIST_ENTRY DeviceList; //本驱动中创建的所有适配器设备
PUNICODE_STRING RegistryPath; //本驱动的服务键路径
} NDIS_M_DRIVER_BLOCK, *PNDIS_M_DRIVER_BLOCK;
下面的函数用于将一个驱动注册为ndis小端口驱动
NDIS_STATUS
NdisMRegisterMiniport(
IN NDIS_HANDLE NdisWrapperHandle,//小端口驱动句柄
IN PNDIS_MINIPORT_CHARACTERISTICS MiniportCharacteristics,
IN UINT CharacteristicsLength)
{
UINT MinSize;
PNDIS_M_DRIVER_BLOCK Miniport = (PNDIS_M_DRIVER_BLOCK)NdisWrapperHandle;
PNDIS_M_DRIVER_BLOCK *MiniportPtr;
NTSTATUS Status;
switch (MiniportCharacteristics->MajorNdisVersion)
{
case 0x03:
MinSize = sizeof(NDIS30_MINIPORT_CHARACTERISTICS);
break;
case 0x04:
MinSize = sizeof(NDIS40_MINIPORT_CHARACTERISTICS);
break;
case 0x05:
MinSize = sizeof(NDIS50_MINIPORT_CHARACTERISTICS);
break;
default:
return NDIS_STATUS_BAD_VERSION;
}
if (CharacteristicsLength < MinSize)
return NDIS_STATUS_BAD_CHARACTERISTICS;
//这三个回调函数在任何ndis版本都必须提供
if ((!MiniportCharacteristics->HaltHandler) ||
(!MiniportCharacteristics->InitializeHandler)||
(!MiniportCharacteristics->ResetHandler))
{
return NDIS_STATUS_BAD_CHARACTERISTICS;
}
if (MiniportCharacteristics->MajorNdisVersion < 0x05)
{
if ((!MiniportCharacteristics->QueryInformationHandler) ||
(!MiniportCharacteristics->SetInformationHandler))
{
return NDIS_STATUS_BAD_CHARACTERISTICS;
}
}
else
{
if (((!MiniportCharacteristics->QueryInformationHandler) ||
(!MiniportCharacteristics->SetInformationHandler)) &&
(!MiniportCharacteristics->CoRequestHandler))
{
return NDIS_STATUS_BAD_CHARACTERISTICS;
}
}
if (MiniportCharacteristics->MajorNdisVersion == 0x03)
{
if (!MiniportCharacteristics->SendHandler)
return NDIS_STATUS_BAD_CHARACTERISTICS;
}
else if (MiniportCharacteristics->MajorNdisVersion == 0x04)
{
if ((!MiniportCharacteristics->SendHandler) &&
(!MiniportCharacteristics->SendPacketsHandler))
{
return NDIS_STATUS_BAD_CHARACTERISTICS;
}
}
else if (MiniportCharacteristics->MajorNdisVersion == 0x05)
{
if ((!MiniportCharacteristics->SendHandler) &&
(!MiniportCharacteristics->SendPacketsHandler) &&
(!MiniportCharacteristics->CoSendPacketsHandler))
{
return NDIS_STATUS_BAD_CHARACTERISTICS;
}
}
//关键。记录该小端口驱动的特征到驱动描述符中
RtlCopyMemory(&Miniport->MiniportCharacteristics, MiniportCharacteristics, MinSize);
Status = IoAllocateDriverObjectExtension(Miniport->DriverObject, 'NMID',
sizeof(PNDIS_M_DRIVER_BLOCK), &MiniportPtr);
*MiniportPtr = Miniport;//驱动扩展指向小端口驱动描述符
//这些irp派遣函数都被ndis托管了。如果我们在注册小端口前设置了这些派遣函数,将会被覆盖。
如果在注册小端口后再设置,可以hook ndis内部设置的那些派遣函数。(I表示Internal内部未导出函数)
Miniport->DriverObject->MajorFunction[IRP_MJ_CREATE] = NdisICreateClose;
Miniport->DriverObject->MajorFunction[IRP_MJ_CLOSE] = NdisICreateClose;
Miniport->DriverObject->MajorFunction[IRP_MJ_PNP] = NdisIDispatchPnp;
Miniport->DriverObject->MajorFunction[IRP_MJ_SHUTDOWN] = NdisIShutdown;
Miniport->DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = NdisIDeviceIoControl;
//关键。Ndis内部托管了AddDevice,它会在内部创建小端口设备对象,绑定在硬件pdo上
Miniport->DriverObject->DriverExtension->AddDevice = NdisIAddDevice;
return NDIS_STATUS_SUCCESS;
}
协议驱动通过NdisRegisterProtocol,小端口驱动通过NdisMRegisterMiniport向ndis框架注册了自己的回调函数后,协议驱动就可以与小端口驱动在ndis框架下通过这两组回调函数进行交互通信了。Ndis.sys起着桥梁中介的作用,除此之外,ndis.sys模块还提供了大量的ndis运行库函数。因此又可以说ndis.sys是一个函数库。
在NdisMRegisterMiniport之前,需要一个ndis小端口驱动句柄(实际上是一个驱动描述符结构),下面的宏就是用来创建一个小端口驱动句柄的。
#define NdisMInitializeWrapper NdisInitializeWrapper
VOID
NdisInitializeWrapper(
OUT PNDIS_HANDLE NdisWrapperHandle, //返回创建的小端口驱动句柄
IN PVOID SystemSpecific1,//必须传DriverObject
IN PVOID SystemSpecific2,//必须传RegisterPath
IN PVOID SystemSpecific3)//无用
{
PNDIS_M_DRIVER_BLOCK Miniport;
PUNICODE_STRING RegistryPath;
WCHAR *RegistryBuffer;
*NdisWrapperHandle = NULL;
//创建一个小端口驱动描述符,也即句柄
Miniport = ExAllocatePool(NonPagedPool, sizeof(NDIS_M_DRIVER_BLOCK));
RtlZeroMemory(Miniport, sizeof(NDIS_M_DRIVER_BLOCK));
KeInitializeSpinLock(&Miniport->Lock);
Miniport->DriverObject = (PDRIVER_OBJECT)SystemSpecific1;
RegistryPath = ExAllocatePool(PagedPool, sizeof(UNICODE_STRING));
RegistryPath->Length = ((PUNICODE_STRING)SystemSpecific2)->Length;
RegistryPath->MaximumLength = RegistryPath->Length + sizeof(WCHAR)
RegistryBuffer = ExAllocatePool(PagedPool, RegistryPath->MaximumLength);
RtlCopyMemory(RegistryBuffer, ((PUNICODE_STRING)SystemSpecific2)->Buffer, RegistryPath->Length);
RegistryBuffer[RegistryPath->Length/sizeof(WCHAR)] = 0;
RegistryPath->Buffer = RegistryBuffer;
Miniport->RegistryPath = RegistryPath;//记录这个小端口驱动的服务键路径
InitializeListHead(&Miniport->DeviceList);//初始为空
//将本小端口驱动挂入全局链表(貌似在NdisMRegisterMiniport中做这项工作更合理)
ExInterlockedInsertTailList(&MiniportListHead, &Miniport->ListEntry, &MiniportListLock);
*NdisWrapperHandle = Miniport;//返回创建的小端口驱动句柄给用户
}
这样,在小端口驱动的描述符中有一个指针指向其驱动对象,而在驱动对象的标准扩展部中也有一个指针指向了小端口驱动的描述符。二者互相指向。
前面说过:ndis内部设置的AddDevice即NdisIAddDevice函数会在内部自动创建一个小端口设备对象,然后加入堆栈。我们看:
NTSTATUS
NdisIAddDevice( //中间的I表示Internal,即ndis.sys内部使用,未导出的函数
IN PDRIVER_OBJECT DriverObject,//ndis小端口驱动对象
IN PDEVICE_OBJECT PhysicalDeviceObject)//代表网卡的硬件pdo
{
static const WCHAR ClassKeyName[] = {'C','l','a','s','s','\\'};
static const WCHAR LinkageKeyName[] = {'\\','L','i','n','k','a','g','e',0};
MiniportPtr = IoGetDriverObjectExtension(DriverObject, (PVOID)'NMID');
Miniport = *MiniportPtr;//获得小端口驱动描述符
//获取该硬件pdo的驱动键属性
Status = IoGetDeviceProperty(PhysicalDeviceObject, DevicePropertyDriverKeyName,
0, NULL, &DriverKeyLength);
LinkageKeyBuffer = ExAllocatePool(PagedPool, DriverKeyLength +
sizeof(ClassKeyName) + sizeof(LinkageKeyName));
Status = IoGetDeviceProperty(PhysicalDeviceObject, DevicePropertyDriverKeyName,
DriverKeyLength, LinkageKeyBuffer +
(sizeof(ClassKeyName) / sizeof(WCHAR)),&DriverKeyLength);
RtlCopyMemory(LinkageKeyBuffer, ClassKeyName, sizeof(ClassKeyName));
RtlCopyMemory(LinkageKeyBuffer + ((sizeof(ClassKeyName) + DriverKeyLength) /
sizeof(WCHAR)) - 1, LinkageKeyName, sizeof(LinkageKeyName));
// LinkageKeyBuffer最终为:‘Class\DriverKeyName\Linkage’
RtlZeroMemory(QueryTable, sizeof(QueryTable));
RtlInitUnicodeString(&ExportName, NULL);
QueryTable[0].Flags = RTL_QUERY_REGISTRY_REQUIRED | RTL_QUERY_REGISTRY_DIRECT;
QueryTable[0].Name = L"Export";
QueryTable[0].EntryContext = &ExportName;
//查询该硬件pdo的ExportName,作为其端口设备对象名称
Status = RtlQueryRegistryValues(RTL_REGISTRY_CONTROL, LinkageKeyBuffer,
QueryTable, NULL, NULL);
//关键。Ndis内部自动为其创建了小端口设备
Status = IoCreateDevice(Miniport->DriverObject, sizeof(LOGICAL_ADAPTER),
&ExportName, FILE_DEVICE_PHYSICAL_NETCARD,0, FALSE, &DeviceObject);
//关键。Ndis为我们创建的小端口设备对象使用标准的LOGICAL_ADAPTER结构设备扩展
Adapter = (PLOGICAL_ADAPTER)DeviceObject->DeviceExtension;
KeInitializeSpinLock(&Adapter->NdisMiniportBlock.Lock);
InitializeListHead(&Adapter->ProtocolListHead);//初始为空
Status = IoRegisterDeviceInterface(PhysicalDeviceObject,&GUID_DEVINTERFACE_NET,
NULL,&Adapter->NdisMiniportBlock.SymbolicLinkName);
Adapter->NdisMiniportBlock.DriverHandle = Miniport;
Adapter->NdisMiniportBlock.MiniportName = ExportName;//小端口设备对象名
Adapter->NdisMiniportBlock.DeviceObject = DeviceObject;
Adapter->NdisMiniportBlock.PhysicalDeviceObject = PhysicalDeviceObject;//该网卡的硬件pdo
//关键。Ndis内部自动创建一个相应的小端口设备,并加入堆栈。(这些操作对用户透明)
Adapter->NdisMiniportBlock.NextDeviceObject =
IoAttachDeviceToDeviceStack(Adapter->NdisMiniportBlock.DeviceObject,PhysicalDeviceObject);
Adapter->NdisMiniportBlock.OldPnPDeviceState = 0;
Adapter->NdisMiniportBlock.PnPDeviceState = NdisPnPDeviceAdded;//标记已创建设备加入堆栈
KeInitializeTimer(&Adapter->NdisMiniportBlock.WakeUpDpcTimer.Timer);
KeInitializeDpc(&Adapter->NdisMiniportBlock.WakeUpDpcTimer.Dpc, MiniportHangDpc, Adapter);
DeviceObject->Flags &= ~DO_DEVICE_INITIALIZING;
return STATUS_SUCCESS;
}
总之,Ndis内部托管的AddDevice会为我们自动创建小端口设备,加入堆栈。但是协议驱动的AddDevice就没有了,因此协议驱动没有形式堆栈到设备栈中。协议驱动与小端口驱动之间断层了,irp只能最终下发到协议驱动这一层就再也传不下去了,协议驱动与小端口驱动之间的交互就不能使用传统的irp方式,而只能借助ndis框架和回调函数进行通信。
Ndis内部的小端口设备对象的设备扩展结构
typedef struct _LOGICAL_ADAPTER //标准的小端口设备扩展
{
//这个字段内部有一个自定义小端口设备扩展,用户设置的自定义设备扩展就放在那里
NDIS_MINIPORT_BLOCK NdisMiniportBlock;
PNDIS_MINIPORT_WORK_ITEM WorkQueueHead; /* Head of work queue */
PNDIS_MINIPORT_WORK_ITEM WorkQueueTail; /* Tail of work queue */
LIST_ENTRY ListEntry; //用来挂入全局的小端口设备链表
LIST_ENTRY MiniportListEntry; //用来挂入本驱动中的小端口设备链表
LIST_ENTRY ProtocolListHead; //绑定着本小端口设备的所有协议驱动
ULONG MediumHeaderSize; //链路层头部长度(即链路层类型)
HARDWARE_ADDRESS Address; //物理地址(以太网卡为MAC)
ULONG AddressLength; //物理地址长度(以太网卡为6B)
PMINIPORT_BUGCHECK_CONTEXT BugcheckContext;
} LOGICAL_ADAPTER, *PLOGICAL_ADAPTER;
当小端口驱动加载执行了DriverEntry、AddDevice后,系统就会发出一个IRP_MN_START_DEVICE的pnp irp来启动设备。Pnp irp 派遣函数也被ndis托管了,固定为:
NdisIDispatchPnp。我们看他是如何处理pnp irp的
NTSTATUS
NdisIDispatchPnp(IN PDEVICE_OBJECT DeviceObject,PIRP Irp)
{
PIO_STACK_LOCATION Stack = IoGetCurrentIrpStackLocation(Irp);
PLOGICAL_ADAPTER Adapter = (PLOGICAL_ADAPTER)DeviceObject->DeviceExtension;
NTSTATUS Status;
switch (Stack->MinorFunction)
{
case IRP_MN_START_DEVICE:
Status = NdisIForwardIrpAndWait(Adapter, Irp);//向下层转发直至完成
if (NT_SUCCESS(Status) && NT_SUCCESS(Irp->IoStatus.Status))
Status = NdisIPnPStartDevice(DeviceObject, Irp);//执行通用的设备启动操作
Irp->IoStatus.Status = Status;
IoCompleteRequest(Irp, IO_NO_INCREMENT);
break;
…
}
return Status;
}
实际的设备启动工作由NdisIPnPStartDevice完成。这是一个通用函数,用来完成一些通用的ndis网卡设备的启动工作。
NTSTATUS NdisIPnPStartDevice(IN PDEVICE_OBJECT DeviceObject,PIRP Irp)
{ …
//加入全局的适配器列表
ExInterlockedInsertTailList(&AdapterListHead, &Adapter->ListEntry, &AdapterListLock);
//关键。回调用户自己提供的启动初始化函数,执行用户自定义的初始化工作
NdisStatus = (*Adapter->NdisMiniportBlock.DriverHandle->MiniportCharacteristics.InitializeHandler)
(
&OpenErrorStatus, &SelectedMediumIndex, &MediaArray[0],
MEDIA_ARRAY_SIZE, Adapter, (NDIS_HANDLE)&WrapperContext
);
…
Adapter->NdisMiniportBlock.MediaType = MediaArray[SelectedMediumIndex];//记录介质类型
…
//加入所属小端口驱动内部的适配器链表
ExInterlockedInsertTailList(&Adapter->NdisMiniportBlock.DriverHandle->DeviceList, &Adapter->MiniportListEntry, &Adapter->NdisMiniportBlock.DriverHandle->Lock);
//关键。新网卡启动初始化完成后,通知所有协议驱动进行绑定
CurrentEntry = ProtocolListHead.Flink;
while (CurrentEntry != &ProtocolListHead)
{
ProtocolBinding = CONTAINING_RECORD(CurrentEntry, PROTOCOL_BINDING, ListEntry);
ndisBindMiniportsToProtocol(&NdisStatus, ProtocolBinding);
CurrentEntry = CurrentEntry->Flink;
}
…
}
小端口驱动都必须提供一个自定义的启动初始化回调函数,用来在网卡启动时执行某些初始化工作。下面是一个典型的启动初始化函数,我们看看一般要做哪些初始化工作。(注意下文所说的示例函数都来自于ne2000.sys这个通用的以太网卡小端口驱动)
//示例函数(这个示例回调函数做了初始化硬件、注册中断向量 等工作)
NDIS_STATUS NTAPI MiniportInitialize(
OUT PNDIS_STATUS OpenErrorStatus,
OUT PUINT SelectedMediumIndex,//返回该网卡的介质类型
IN PNDIS_MEDIUM MediumArray,
IN UINT MediumArraySize,
IN NDIS_HANDLE MiniportAdapterHandle,//实际上就是内置的标准小端口设备扩展
IN NDIS_HANDLE WrapperConfigurationContext)//硬件pdo的一些属性信息和配置键
{
UINT *RegNetworkAddress = 0;
UINT RegNetworkAddressLength = 0;
for (i = 0; i < MediumArraySize; i++)
{
if (MediumArray[i] == NdisMedium802_3)
break;
}
if (i == MediumArraySize)
return NDIS_STATUS_UNSUPPORTED_MEDIA;
*SelectedMediumIndex = i;//返回介质类型
//分配一个自定义的小端口设备对象扩展
Status = NdisAllocateMemory(&Adapter,sizeof(NIC_ADAPTER),0,HighestAcceptableMax);
NdisZeroMemory(Adapter, sizeof(NIC_ADAPTER));
Adapter->MiniportAdapterHandle = MiniportAdapterHandle;//记录标准设备扩展
//下面是默认的资源配置
Adapter->IoBaseAddress = DRIVER_DEFAULT_IO_BASE_ADDRESS;//即port基地址
Adapter->InterruptLevel = DRIVER_DEFAULT_INTERRUPT_NUMBER;
Adapter->InterruptVector = DRIVER_DEFAULT_INTERRUPT_NUMBER;
Adapter->InterruptShared = DRIVER_DEFAULT_INTERRUPT_SHARED;
Adapter->InterruptMode = DRIVER_DEFAULT_INTERRUPT_MODE;
Adapter->MaxMulticastListSize = DRIVER_MAX_MULTICAST_LIST_SIZE;
Adapter->InterruptMask = DRIVER_INTERRUPT_MASK;
Adapter->LookaheadSize = DRIVER_MAXIMUM_LOOKAHEAD;//负载前视区长度
//查询系统为该网卡分配的资源(irq、port),记录到自定义设备扩展中
MiQueryResources(&Status, Adapter, WrapperConfigurationContext);
//如果分配失败或查询失败,就从注册表中配置该网卡需要的资源
if (Status != NDIS_STATUS_SUCCESS)
{
PNDIS_CONFIGURATION_PARAMETER ConfigurationParameter;
UNICODE_STRING Keyword;
//打开配置键
NdisOpenConfiguration(&Status, &ConfigurationHandle, WrapperConfigurationContext);
if (Status == NDIS_STATUS_SUCCESS)
{
//查询irq
NdisInitUnicodeString(&Keyword, L"Irq");
NdisReadConfiguration(&Status, &ConfigurationParameter, ConfigurationHandle, &Keyword, NdisParameterHexInteger);
if(Status == NDIS_STATUS_SUCCESS)
{
Adapter->InterruptLevel =
Adapter->InterruptVector = ConfigurationParameter->ParameterData.IntegerData;
}
//查询port
NdisInitUnicodeString(&Keyword, L"Port");
NdisReadConfiguration(&Status, &ConfigurationParameter, ConfigurationHandle, &Keyword, NdisParameterHexInteger);
if(Status == NDIS_STATUS_SUCCESS)
Adapter->IoBaseAddress = ConfigurationParameter->ParameterData.IntegerData;
NdisCloseConfiguration(ConfigurationHandle);
}
}
//关键。设置自定义设备扩展
NdisMSetAttributes(MiniportAdapterHandle,
(NDIS_HANDLE)Adapter,//记录这个自定义的小端口设备扩展 到 标准设备扩展内部
FALSE,NdisInterfaceIsa);
Status = NdisMRegisterIoPortRange(&Adapter->IOBase,MiniportAdapterHandle,
Adapter->IoBaseAddress,0x20);
if (Status != NDIS_STATUS_SUCCESS) 。。。
Adapter->IOPortRangeRegistered = TRUE;
#ifndef NOCARD
Status = NICInitialize(Adapter);//初始化网卡内部的硬件寄存器
if (Status != NDIS_STATUS_SUCCESS) 。。。
NdisOpenConfiguration(&Status, &ConfigurationHandle, WrapperConfigurationContext);
if (Status == NDIS_STATUS_SUCCESS)
{ //从注册表中读取软配置的MAC地址
NdisReadNetworkAddress(&Status, (PVOID *)&RegNetworkAddress, &RegNetworkAddressLength, ConfigurationHandle);
if(Status == NDIS_STATUS_SUCCESS && RegNetworkAddressLength == 6)
{
for(i = 0; i < 6; i++)
Adapter->StationAddress[i] = RegNetworkAddress[i];
}
NdisCloseConfiguration(ConfigurationHandle);
}
if (Status != NDIS_STATUS_SUCCESS || RegNetworkAddressLength !=6)
{
for (i = 0; i < 6; i++) //使用固定的MAC地址
Adapter->StationAddress[i] = Adapter->PermanentAddress[i];
}
。。。
NICSetup(Adapter); //设置网卡内部的硬件寄存器
#endif
//注册中断向量
Status = NdisMRegisterInterrupt(&Adapter->Interrupt, MiniportAdapterHandle,
Adapter->InterruptVector,Adapter->InterruptLevel,FALSE,
Adapter->InterruptShared,Adapter->InterruptMode);
if (Status != NDIS_STATUS_SUCCESS) 。。。
Adapter->InterruptRegistered = TRUE;
#ifndef NOCARD
NICStart(Adapter); //设置网卡内部的硬件寄存器
#endif
NdisMRegisterAdapterShutdownHandler(MiniportAdapterHandle, Adapter, MiniportShutdown);
Adapter->ShutdownHandlerRegistered = TRUE;
InsertTailList(&DriverInfo.AdapterListHead, &Adapter->ListEntry);
return NDIS_STATUS_SUCCESS;
}
如上,可以看出,一块网卡的启动初始化工作是比较复杂的。上面的示例函数分配了一个自定义的小端口设备对象扩展,初始化网卡内部的硬件寄存器,注册中断向量,最后返回网卡的介质类型告诉给ndis框架(前3工作是可选的,最后的告诉工作是必须的)
事实上,一般的网卡在启动初始化时都要做这些工作:【硬件、注断、自扩展】
硬件:指初始化硬件
注断:注册中断isr
自扩展:在标准小端口设备扩展之外再另行分配一个自定义设备扩展
题外话:
为什么要分配一个自定义设备扩展呢? 我们知道,ndis内部提供托管的AddDevice会为我们自动创建一个小端口设备对象,而这个设备对象的设备扩展是ndis内部预置的一个结构。以往我们手动调用IoCreateDevice时都是自己定义的设备扩展来保存自定义信息,但现在被ndis托管了,如果我们希望仍旧保存一些自定义信息怎么办?Ndis框架不傻,那个预置的小端口设备扩展内部就提供了一个字段(即适配器上下文),用来存放用户自定义的设备扩展。用户只需分配一个设备扩展,然后调用NdisMSetAttributes设置一下即可。
到时候ndis调用我们的回调函数时,会传入这个自定义设备扩展的。
如上,我们说了,在网卡启动初始化阶段,一般需要注册一个中断向量,下面的函数就是干这个的。
NDIS_STATUS
NdisMRegisterInterrupt(
OUT PNDIS_MINIPORT_INTERRUPT Interrupt,//返回
IN NDIS_HANDLE MiniportAdapterHandle,
IN UINT InterruptVector,
IN UINT InterruptLevel,
IN BOOLEAN RequestIsr,
IN BOOLEAN SharedInterrupt,
IN NDIS_INTERRUPT_MODE InterruptMode)
{
NTSTATUS Status;
ULONG MappedIRQ;
KIRQL DIrql;
KAFFINITY Affinity;
PLOGICAL_ADAPTER Adapter = (PLOGICAL_ADAPTER)MiniportAdapterHandle;
RtlZeroMemory(Interrupt, sizeof(NDIS_MINIPORT_INTERRUPT));
KeInitializeSpinLock(&Interrupt->DpcCountLock);
// HandleDeferredProcessing为DPC
KeInitializeDpc(&Interrupt->InterruptDpc, HandleDeferredProcessing, Adapter);
KeInitializeEvent(&Interrupt->DpcsCompletedEvent, NotificationEvent, FALSE);
Interrupt->SharedInterrupt = SharedInterrupt;
Interrupt->IsrRequested = RequestIsr;
Interrupt->Miniport = &Adapter->NdisMiniportBlock;
MappedIRQ = HalGetInterruptVector(Adapter->NdisMiniportBlock.BusType, Adapter->NdisMiniportBlock.BusNumber,InterruptLevel, InterruptVector, &DIrql,&Affinity);
//关键。注册中断向量。Isr为ServiceRoutine,是ndis自己内部提供的isr
Status = IoConnectInterrupt(&Interrupt->InterruptObject, ServiceRoutine, Interrupt, &Interrupt->DpcCountLock, MappedIRQ,DIrql, DIrql, InterruptMode, SharedInterrupt, Affinity, FALSE);
if (NT_SUCCESS(Status)) {
Adapter->NdisMiniportBlock.Interrupt = Interrupt;
Adapter->NdisMiniportBlock.RegisteredInterrupts++;
return NDIS_STATUS_SUCCESS;
}
return NDIS_STATUS_FAILURE;
}
这样,一旦有中断发生,就会进入ServiceRoutine这个isr。这个isr是ndis内部自己提供的,我们看它做了什么
BOOLEAN ServiceRoutine(IN PKINTERRUPT Interrupt, IN PVOID ServiceContext)
{
BOOLEAN InterruptRecognized = FALSE;
BOOLEAN QueueMiniportHandleInterrupt = FALSE;
PNDIS_MINIPORT_INTERRUPT NdisInterrupt = ServiceContext;
PNDIS_MINIPORT_BLOCK NdisMiniportBlock = NdisInterrupt->Miniport;
if (NdisInterrupt->IsrRequested)//是否要执行isr
{
//调用我们注册小端口特征时登记的isr,简称我们的isr
(*NdisMiniportBlock->DriverHandle->MiniportCharacteristics.ISRHandler)(
&InterruptRecognized,
&QueueMiniportHandleInterrupt, //返回是否要执行isr的后半部
NdisMiniportBlock->MiniportAdapterContext);
}
else if (NdisMiniportBlock->DriverHandle->MiniportCharacteristics.DisableInterruptHandler)
{
(*NdisMiniportBlock->DriverHandle->MiniportCharacteristics.DisableInterruptHandler)(
NdisMiniportBlock->MiniportAdapterContext);
QueueMiniportHandleInterrupt = TRUE;
InterruptRecognized = TRUE;
}
if (QueueMiniportHandleInterrupt) //执行HandleDeferredProcessing这个DPC,即isr的后半部
KeInsertQueueDpc(&NdisInterrupt->InterruptDpc, NULL, NULL);
return InterruptRecognized;
}
//我们的isr。(这是一个示例函数)
VOID NTAPI MiniportISR(
OUT PBOOLEAN InterruptRecognized,
OUT PBOOLEAN QueueMiniportHandleInterrupt,//返回时
IN NDIS_HANDLE MiniportAdapterContext)
{
//屏蔽来自这个网卡的后续中断。注意与cli指令不一样。
NICDisableInterrupts((PNIC_ADAPTER)MiniportAdapterContext);
*InterruptRecognized = TRUE;
*QueueMiniportHandleInterrupt = TRUE;
}
如上,我们编写的这个示例isr很简单,它仅仅暂时屏蔽来自这个网卡的后续中断,然后,QueueMiniportHandleInterrupt置为TRUE,表示将实质的中断处理工作纳入到DPC中去执行。由于DPC都是在开中断的条件下执行的,所以必须先屏蔽掉来自同一网卡的其它后续中断,防止嵌套。而这个DPC就是ndis内部自己提供的下面函数,我们看它做了什么工作。
VOID HandleDeferredProcessing(
IN PKDPC Dpc,
IN PVOID DeferredContext,
IN PVOID SystemArgument1,
IN PVOID SystemArgument2)
{
PLOGICAL_ADAPTER Adapter = GET_LOGICAL_ADAPTER(DeferredContext);
//关键。调用用户自己注册小端口时提供的HandleInterruptHandler例程(*Adapter->NdisMiniportBlock.DriverHandle->MiniportCharacteristics.HandleInterruptHandler)(
Adapter->NdisMiniportBlock.MiniportAdapterContext);
//重新启用中断后执行这个函数
if(Adapter->NdisMiniportBlock.DriverHandle->MiniportCharacteristics.EnableInterruptHandler)
(*Adapter->NdisMiniportBlock.DriverHandle->MiniportCharacteristics.EnableInterruptHandler)
(Adapter->NdisMiniportBlock.MiniportAdapterContext);
}
如上,用户自己注册小端口时提供的HandleInterruptHandler例程其实就是我们isr的后半部。当ndis框架回调执行了isr的后半部后,所有中断处理工作都处理完毕了,然后就可以开启来自这个网卡的后续中断了,也即撤销屏蔽。
下面就是一个示例HandleInterruptHandler,来自于ne2000驱动,我们看看那个驱动的isr后半部工作到底做了什么。
VOID NTAPI MiniportHandleInterrupt(IN NDIS_HANDLE MiniportAdapterContext)//自定义设备扩展
{
UCHAR ISRValue;
UCHAR ISRMask;
UCHAR Mask;
PNIC_ADAPTER Adapter = (PNIC_ADAPTER)MiniportAdapterContext;
UINT i = 0;
ISRMask = Adapter->InterruptMask;//一般为0xFF
//所有网卡内部都配备有一个中断状态寄存器,即PG0_ISR
NdisRawReadPortUchar(Adapter->IOBase + PG0_ISR, &ISRValue);//读取当前网卡的状态
Adapter->InterruptStatus |= (ISRValue & ISRMask);
Mask = 0x01;//mask表示位置掩码
while (Adapter->InterruptStatus != 0x00 && i++ < INTERRUPT_LIMIT)
{
if (ISRValue != 0x00) {
NdisRawWritePortUchar(Adapter->IOBase + PG0_ISR, ISRValue);
Mask = 0x01;//重新回到最低位
}
//逐位向高位扫描
while (((Adapter->InterruptStatus & Mask) == 0) && (Mask < ISRMask))
Mask = (Mask << 1);
switch (Adapter->InterruptStatus & Mask)
{
case ISR_OVW://每当芯片中的接收缓冲区溢出时会触发这种中断
Adapter->BufferOverflow = TRUE;
if(Adapter->MiniportAdapterHandle)
HandleReceive(Adapter); //接出所有帧,提交给上层
Adapter->InterruptStatus &= ~ISR_OVW;
break;
case ISR_RXE://每当收到一个错误帧时触发这种中断
NICUpdateCounters(Adapter);
Adapter->ReceiveError = TRUE;
case ISR_PRX://每当芯片收到一个以太网帧时,触发这种中断
if(Adapter->MiniportAdapterHandle)
HandleReceive(Adapter); //接出所有帧,提交给上层
Adapter->InterruptStatus &= ~(ISR_PRX | ISR_RXE);
break;
case ISR_TXE://每当芯片发送一帧失败时,触发这种中断
NICUpdateCounters(Adapter);
Adapter->TransmitError = TRUE;
case ISR_PTX://每当芯片中的发送缓冲区变空时触发这种中断
HandleTransmit(Adapter);
Adapter->InterruptStatus &= ~(ISR_PTX | ISR_TXE);
break;
case ISR_CNT://每当芯片中的计数器溢出时触发这种中断
NICUpdateCounters(Adapter);
Adapter->InterruptStatus &= ~ISR_CNT;
break;
default:
Adapter->InterruptStatus &= ~Mask;
break;
}
Mask = (Mask << 1);
NdisRawReadPortUchar(Adapter->IOBase + PG0_ISR, &ISRValue);
Adapter->InterruptStatus |= (ISRValue & ISRMask);//状态可能又变了,读取最新的状态
}
}
如上,网卡的物理状态发生上述变化时,都会触发一次中断,同时记录在状态寄存器对应的位。
我们的isr每得到一次中断时,都要扫描状态寄存器中的所有状态位,一一处理(因为我们在处理中断时,屏蔽了来自这个网卡的中断,因此会造成中断累积。所以必须在每次中断的处理函数中处理所有可能发生的状态)
网卡芯片中有一个硬件发送缓冲区和一个硬件接收缓冲区。当从网络电缆来到一帧时,就会存放到芯片内部的接收缓冲区。芯片会将内部的发送缓冲区中的帧注入到电缆上,这个过程也比较费时。芯片与电缆的数据交换速度受网卡制造工艺限制,一般的网卡也不过是百兆、千兆带宽而已。即使交换速度慢,但是如果应用程序收帧的速度没有帧从网络电缆抵达网卡的速度快的话,网卡内部的接收缓冲区就会逐渐变满而溢出,从而导致触发中断。同理,当网卡终于把内部发送缓冲区中的数据发出到网络电缆后,发送缓冲区变成空闲时,也会触发中断。
Ne2000以太网卡的小端口驱动提供了HandleReceive这个函数,用于从网卡内部的接收缓冲区中读出所有帧,提交给上层(实际上是提交给绑定着这块网卡的所有协议驱动)。HandleReceive这个函数内部使用了NdisMEthIndicateReceive宏完成提交工作。这个宏实际上调用了下面的函数来做提交工作
VOID
EthFilterDprIndicateReceive(
IN PETH_FILTER Filter,
IN NDIS_HANDLE MacReceiveContext,
IN PCHAR Address,
IN PVOID HeaderBuffer,
IN UINT HeaderBufferSize,
IN PVOID LookaheadBuffer,
IN UINT LookaheadBufferSize,
IN UINT PacketSize)
{
MiniIndicateData((PLOGICAL_ADAPTER)((PETHI_FILTER)Filter)->Miniport,
MacReceiveContext,HeaderBuffer,HeaderBufferSize,
LookaheadBuffer,LookaheadBufferSize,PacketSize);
}
VOID
MiniIndicateData( //向上提交部分帧
PLOGICAL_ADAPTER Adapter,//目标网卡设备的标准设备扩展
NDIS_HANDLE MacReceiveContext,
PVOID HeaderBuffer,//帧头
UINT HeaderBufferSize,//帧头长度
PVOID LookaheadBuffer,//负载部分的前N字节,又叫前视区
UINT LookaheadBufferSize,// 前视区长度
UINT PacketSize)//负载部分的总长
{
KIRQL OldIrql;
PLIST_ENTRY CurrentEntry;
PADAPTER_BINDING AdapterBinding;
MiniDisplayPacket2(HeaderBuffer, HeaderBufferSize, LookaheadBuffer, LookaheadBufferSize);
KeAcquireSpinLock(&Adapter->NdisMiniportBlock.Lock, &OldIrql);
{
CurrentEntry = Adapter->ProtocolListHead.Flink;
//遍历绑定了本网卡的那些协议驱动
while (CurrentEntry != &Adapter->ProtocolListHead)
{
AdapterBinding = CONTAINING_RECORD(CurrentEntry, ADAPTER_BINDING, AdapterListEntry);
//看到没,调用其提供的接收函数
(*AdapterBinding->ProtocolBinding->Chars.ReceiveHandler)(
AdapterBinding->NdisOpenBlock.ProtocolBindingContext,MacReceiveContext,
HeaderBuffer,HeaderBufferSize,
LookaheadBuffer,LookaheadBufferSize,PacketSize);
CurrentEntry = CurrentEntry->Flink;
}
}
KeReleaseSpinLock(&Adapter->NdisMiniportBlock.Lock, OldIrql);
}
如上,这个函数用来向上层绑定的所有协议提交帧(可能不是完整的帧)
前面说过了,当一个新的网卡插入机器时,ndis框架会通知所有现有的协议驱动进行绑定。当一个新的协议驱动安装加载到系统时,ndis框架也会让这个协议绑定现有的所有网卡(协议驱动与网卡之间必须绑定后才能通信)。当ndis框架通知协议驱动进行绑定时,会调用各协议驱动注册的绑定回调函数,在这个函数中,我们应该调用NdisOpenAdapter打开那个新插入到系统的网卡,进行绑定。
VOID
NdisOpenAdapter(
OUT PNDIS_STATUS Status,//返回
OUT PNDIS_STATUS OpenErrorStatus,//返回
OUT PNDIS_HANDLE NdisBindingHandle,//返回生成的绑定句柄(即标准的绑定上下文)
OUT PUINT SelectedMediumIndex,//返回目标网卡的介质类型
IN PNDIS_MEDIUM MediumArray,//目标协议支持的所有介质类型
IN UINT MediumArraySize,
IN NDIS_HANDLE NdisProtocolHandle,//目标协议
IN NDIS_HANDLE ProtocolBindingContext,//自定义的绑定上下文
IN PNDIS_STRING AdapterName,//小端口设备对象名(即目标网卡)
IN UINT OpenOptions,
IN PSTRING AddressingInformation OPTIONAL)
{
UINT i;
BOOLEAN Found;
PLOGICAL_ADAPTER Adapter;
PADAPTER_BINDING AdapterBinding;
PPROTOCOL_BINDING Protocol = GET_PROTOCOL_BINDING(NdisProtocolHandle);
Adapter = MiniLocateDevice(AdapterName);//根据名称找到目标网卡
Found = FALSE;
for (i = 0; i < MediumArraySize; i++)
{
if (Adapter->NdisMiniportBlock.MediaType == MediumArray[i])
{
*SelectedMediumIndex = i;
Found = TRUE;
break;
}
}
//一种协议可以支持很多种网卡的,如tcpip协议可以承载在以太网卡、令牌环网卡、FDDI网卡、ATM网卡等多种链路类型的网卡
if (!Found)//if目标协议不支持目标网卡
{
*Status = NDIS_STATUS_UNSUPPORTED_MEDIA;
return;
}
//分配一个标准的绑定上下文(即绑定句柄)
AdapterBinding = ExAllocatePool(NonPagedPool, sizeof(ADAPTER_BINDING));
RtlZeroMemory(AdapterBinding, sizeof(ADAPTER_BINDING));
//在绑定上下文中记录谁绑定了谁
AdapterBinding->ProtocolBinding = Protocol;
AdapterBinding->Adapter = Adapter;
//关键。在标准绑定上下文中记录自定义绑定上下文
AdapterBinding->NdisOpenBlock.ProtocolBindingContext = ProtocolBindingContext;
AdapterBinding->NdisOpenBlock.BindingHandle = (NDIS_HANDLE)AdapterBinding;
//pro开头的都是ndis内部函数,某些宏需要这些函数
AdapterBinding->NdisOpenBlock.RequestHandler = ProRequest;
AdapterBinding->NdisOpenBlock.ResetHandler = ProReset;
AdapterBinding->NdisOpenBlock.SendHandler = ProSend;
AdapterBinding->NdisOpenBlock.SendPacketsHandler = ProSendPackets;
AdapterBinding->NdisOpenBlock.TransferDataHandler = ProTransferData;
AdapterBinding->NdisOpenBlock.RequestCompleteHandler =
Protocol->Chars.RequestCompleteHandler;
//互相插入各自的绑定列表中
ExInterlockedInsertTailList(&Protocol->AdapterListHead, &AdapterBinding->ProtocolListEntry, &Protocol->Lock);
ExInterlockedInsertTailList(&Adapter->ProtocolListHead, &AdapterBinding->AdapterListEntry, &Adapter->NdisMiniportBlock.Lock);
*NdisBindingHandle = (NDIS_HANDLE)AdapterBinding;//返回绑定句柄(即内部创建的标准绑定上下文)
*Status = NDIS_STATUS_SUCCESS;
}
TCP/IP、IPX/SPX都是协议驱动,如今的时代,tcpip占据了市场主导地位,我们看下这个协议驱动(tcpip.sys)的部分实现
NTSTATUS DriverEntry(PDRIVER_OBJECT DriverObject,PUNICODE_STRING RegistryPath)
{
NTSTATUS Status;
UNICODE_STRING strIpDeviceName = RTL_CONSTANT_STRING(L“\\Device\\Ip”);
UNICODE_STRING strRawDeviceName = RTL_CONSTANT_STRING(L“\\Device\\RawIp”);
UNICODE_STRING strUdpDeviceName = RTL_CONSTANT_STRING(L“\\Device\\Udp”);
UNICODE_STRING strTcpDeviceName = RTL_CONSTANT_STRING(L“\\Device\\Tcp”);
UNICODE_STRING strNdisDeviceName = RTL_CONSTANT_STRING(L“Tcpip”);
NDIS_STATUS NdisStatus;
LARGE_INTEGER DueTime;
KeInitializeDpc(&IPTimeoutDpc, IPTimeoutDpcFn, NULL);
KeInitializeTimer(&IPTimer);
/* Create IP device object */
Status = IoCreateDevice(DriverObject, 0, &strIpDeviceName,
FILE_DEVICE_NETWORK, 0, FALSE, &IPDeviceObject);
ChewInit( IPDeviceObject );
/* Create RawIP device object */
Status = IoCreateDevice(DriverObject, 0, &strRawDeviceName,
FILE_DEVICE_NETWORK, 0, FALSE, &RawIPDeviceObject);
/* Create UDP device object */
Status = IoCreateDevice(DriverObject, 0, &strUdpDeviceName,
FILE_DEVICE_NETWORK, 0, FALSE, &UDPDeviceObject);
/* Create TCP device object */
Status = IoCreateDevice(DriverObject, 0, &strTcpDeviceName,
FILE_DEVICE_NETWORK, 0, FALSE, &TCPDeviceObject);
/* Setup network layer and transport layer entities */
KeInitializeSpinLock(&EntityListLock);
EntityList = ExAllocatePoolWithTag(NonPagedPool,sizeof(TDIEntityID) * MAX_TDI_ENTITIES);
EntityCount = 0;
EntityMax = MAX_TDI_ENTITIES;
//分配全局包描述符池
NdisAllocatePacketPool(&NdisStatus, &GlobalPacketPool, 100, sizeof(PACKET_CONTEXT));
//分配全局缓冲描述符池
NdisAllocateBufferPool(&NdisStatus, &GlobalBufferPool, 100);
//初始化地址文件对象列表
InitializeListHead(&AddressFileListHead);
KeInitializeSpinLock(&AddressFileListLock);
//初始化连接端点列表
InitializeListHead(&ConnectionEndpointListHead);
KeInitializeSpinLock(&ConnectionEndpointListLock);
//初始化本协议的绑定网卡列表
InitializeListHead(&InterfaceListHead);
KeInitializeSpinLock(&InterfaceListLock);
IPStartup(RegistryPath); //启动初始化网络层
RawIPStartup();//启动初始化RawIp协议
UDPStartup();//启动初始化Udp协议
TCPStartup();//启动初始化Tcp协议
ICMPStartup();//启动初始化Icmp协议
//各种协议层设备都使用直接mdl io方式
IPDeviceObject->Flags |= DO_DIRECT_IO; RawIPDeviceObject->Flags |= DO_DIRECT_IO;
UDPDeviceObject->Flags |= DO_DIRECT_IO; TCPDeviceObject->Flags |= DO_DIRECT_IO;
DriverObject->MajorFunction[IRP_MJ_CREATE] = TiDispatchOpenClose;
DriverObject->MajorFunction[IRP_MJ_CLOSE] = TiDispatchOpenClose;
DriverObject->MajorFunction[IRP_MJ_INTERNAL_DEVICE_CONTROL] = TiDispatchInternal;
DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = TiDispatch;
DriverObject->DriverUnload = TiUnload;
//注册回环网卡(127.0.0.1是一个特殊的虚拟网卡),加入绑定网卡列表和路由表
Status = LoopRegisterAdapter(NULL, NULL);
Status = LANRegisterProtocol(&strNdisDeviceName);//关键。注册协议驱动特征
DueTime.QuadPart = -(LONGLONG)IP_TIMEOUT * 10000;
KeSetTimerEx(&IPTimer, DueTime, IP_TIMEOUT, &IPTimeoutDpc);
return STATUS_SUCCESS;
}
如上,这个协议驱动内部会创建一个网络层设备对象和三个传输层设备对象(RawIp也是传输层),这样,应用程序就可以直接打开这些设备,收发报文(不过,很少有应用程序这样做,一般都是通过socket
间接打开这些设备进行通信的)。
下面的函数用来将tcpip.sys注册为一个协议驱动
NTSTATUS LANRegisterProtocol(PNDIS_STRING Name)//协议名
{
NDIS_STATUS NdisStatus;
NDIS_PROTOCOL_CHARACTERISTICS ProtChars;
InitializeListHead(&AdapterListHead);
KeInitializeSpinLock(&AdapterListLock);
RtlZeroMemory(&ProtChars, sizeof(NDIS_PROTOCOL_CHARACTERISTICS));
ProtChars.MajorNdisVersion = NDIS_VERSION_MAJOR;
ProtChars.MinorNdisVersion = NDIS_VERSION_MINOR;
ProtChars.Name.Length = Name->Length;
ProtChars.Name.Buffer = Name->Buffer;
ProtChars.Name.MaximumLength = Name->MaximumLength;
ProtChars.OpenAdapterCompleteHandler = ProtocolOpenAdapterComplete;
ProtChars.CloseAdapterCompleteHandler = ProtocolCloseAdapterComplete;
ProtChars.ResetCompleteHandler = ProtocolResetComplete;
ProtChars.RequestCompleteHandler = ProtocolRequestComplete;
ProtChars.SendCompleteHandler = ProtocolSendComplete;
ProtChars.TransferDataCompleteHandler = ProtocolTransferDataComplete;
ProtChars.ReceiveHandler = ProtocolReceive;//关键
ProtChars.ReceiveCompleteHandler = ProtocolReceiveComplete;
ProtChars.StatusHandler = ProtocolStatus;
ProtChars.StatusCompleteHandler = ProtocolStatusComplete;
ProtChars.BindAdapterHandler = ProtocolBindAdapter;
ProtChars.PnPEventHandler = ProtocolPnPEvent;
ProtChars.UnbindAdapterHandler = ProtocolUnbindAdapter;
ProtChars.UnloadHandler = LANUnregisterProtocol;
NdisRegisterProtocol(&NdisStatus,&NdisProtocolHandle,&ProtChars,
sizeof(NDIS_PROTOCOL_CHARACTERISTICS));
if (NdisStatus != NDIS_STATUS_SUCCESS)
return (NTSTATUS)NdisStatus;
ProtocolRegistered = TRUE;
return STATUS_SUCCESS;
}
当注册为协议驱动后,该协议驱动会打开所有现有适配器,进行绑定。一旦某个网卡收到了数据,触发中断,小端口驱动就会调用上层各绑定协议注册的接收函数,将收到的帧提交给它们。Tcpip协议驱动注册的接收函数是ProtocolReceive,我们看它是如何接收处理的。
NDIS_STATUS NTAPI ProtocolReceive(
NDIS_HANDLE BindingContext,//传进来的是的自定义绑定上下文
NDIS_HANDLE MacReceiveContext,
PVOID HeaderBuffer,//帧头
UINT HeaderBufferSize,
PVOID LookaheadBuffer,//负载部分前视区部分
UINT LookaheadBufferSize,
UINT PacketSize)//负载部分的总长
{
PLAN_ADAPTER Adapter = (PLAN_ADAPTER)BindingContext;
PETH_HEADER EHeader = (PETH_HEADER)HeaderBuffer;
USHORT EType;
if (Adapter->State != LAN_STATE_STARTED)
return NDIS_STATUS_NOT_ACCEPTED;
if (HeaderBufferSize < Adapter->HeaderSize)
return NDIS_STATUS_NOT_ACCEPTED;
if (Adapter->Media == NdisMedium802_3)
{
if ((EType != ETYPE_IPv4) && (EType != ETYPE_ARP))
return NDIS_STATUS_NOT_ACCEPTED;//目前不支持其他报文
PacketType = EType;//承载的是IP/ARP报文
}
else
return NDIS_STATUS_NOT_ACCEPTED; //目前只支持以太网卡
//分配一个包描述符用来接收负载部分,注意一个包描述符内部可以包含多个缓冲描述符,组合起来描述一个逻辑上的连续缓冲
NdisStatus = AllocatePacketWithBuffer( &NdisPacket, NULL,PacketSize );
PC(NdisPacket)->PacketType = PacketType;
IPPacket.NdisPacket = NdisPacket;
IPPacket.Position = 0;
TransferDataCalled++;
if (LookaheadBufferSize == PacketSize)//if刚好收到一个完整的帧
{
GetDataPtr( NdisPacket, 0, &BufferData, &temp );//获得包描述符的缓冲区地址
NdisCopyLookaheadData(BufferData,LookaheadBuffer,LookaheadBufferSize,
Adapter->MacOptions);
}
Else //不要这个残帧,向下层小端口驱动投递一个接收包下去,请求转交完整的负载上来
{
NdisTransferData(&NdisStatus, Adapter->NdisHandle,MacReceiveContext, 0,
PacketSize,//完整负载
NdisPacket,//投递下去,相当于一个容器
&BytesTransferred);//返回实际转交的字节数
}
if (NdisStatus != NDIS_STATUS_PENDING)//手动调用完成回调函数
ProtocolTransferDataComplete(BindingContext,NdisPacket,NdisStatus,PacketSize);
return NDIS_STATUS_SUCCESS;
}
当本次接收操作完成后,也即接收到一个完整的包后,就调用ProtocolTransferDataComplete进行处理,我们看具体是是如何处理接收到的包的。(此处的包为IP或ARP报文,我们看它是如何处理的)
VOID NTAPI ProtocolTransferDataComplete(
NDIS_HANDLE BindingContext,//自定义的绑定上下文
PNDIS_PACKET Packet,
NDIS_STATUS Status,
UINT BytesTransferred)
{
TransferDataCompleteCalled++;
if( Status != NDIS_STATUS_SUCCESS ) return;
LanSubmitReceiveWork( BindingContext, Packet, Status, BytesTransferred );//实质函数
}
VOID LanSubmitReceiveWork(
NDIS_HANDLE BindingContext, //自定义的绑定上下文
PNDIS_PACKET Packet,//IP/ARP报文
NDIS_STATUS Status,
UINT BytesTransferred)
{
PLAN_WQ_ITEM WQItem = ExAllocatePoolWithTag(NonPagedPool, sizeof(LAN_WQ_ITEM));
PLAN_ADAPTER Adapter = (PLAN_ADAPTER)BindingContext;
WQItem->Packet = Packet;
WQItem->Adapter = Adapter;
WQItem->BytesTransferred = BytesTransferred;
ChewCreate( LanReceiveWorker, WQItem );//创建一条接收处理工作项
}
我们看到,收到一个报文后,以工作项的形式进行处理,最后进入LanReceiveWorker这个函数进行接收处理
VOID LanReceiveWorker( PVOID Context )
{
UINT PacketType;
PLAN_WQ_ITEM WorkItem = (PLAN_WQ_ITEM)Context;
PNDIS_PACKET Packet;
PLAN_ADAPTER Adapter;
UINT BytesTransferred;
PNDIS_BUFFER NdisBuffer;
IP_PACKET IPPacket;
Packet = WorkItem->Packet;
Adapter = WorkItem->Adapter;
BytesTransferred = WorkItem->BytesTransferred;
ExFreePoolWithTag(WorkItem, WQ_CONTEXT_TAG);
IPInitializePacket(&IPPacket, 0);
IPPacket.NdisPacket = Packet;
NdisGetFirstBufferFromPacket(Packet,&NdisBuffer,&IPPacket.Header,
&IPPacket.ContigSize,&IPPacket.TotalSize);
IPPacket.ContigSize = IPPacket.TotalSize = BytesTransferred;
PacketType = PC(IPPacket.NdisPacket)->PacketType;
IPPacket.Position = 0;
switch (PacketType)
{
case ETYPE_IPv4:
case ETYPE_IPv6:
IPReceive(Adapter->Context, &IPPacket);//上交给IP层去接收、解析处理
break;
case ETYPE_ARP:
ARPReceive(Adapter->Context, &IPPacket);//上交给ARP层去接收、解析处理
default:
IPPacket.Free(&IPPacket);
break;
}
FreeNdisPacket( Packet );
}
ARP的就不看了,看IP报文是如何在IP层接收的
VOID IPReceive( PIP_INTERFACE IF, PIP_PACKET IPPacket )
{
UINT Version = (((PIPv4_HEADER)IPPacket->Header)->VerIHL >> 4);
switch (Version) {
case 4:
IPPacket->Type = IP_ADDRESS_V4;
IPv4Receive(IF, IPPacket);
break;
case 6:
IPPacket->Type = IP_ADDRESS_V6;
break;
default:
break;
}
IPPacket->Free(IPPacket);
}
VOID IPv4Receive(PIP_INTERFACE IF, PIP_PACKET IPPacket)
{
IPPacket->HeaderSize = (((PIPv4_HEADER)IPPacket->Header)->VerIHL & 0x0F) << 2;
if (IPPacket->HeaderSize > IPv4_MAX_HEADER_SIZE) //错误的包,丢弃
return;
if (!IPv4CorrectChecksum(IPPacket->Header, IPPacket->HeaderSize)) //头部校验失败,丢弃
return;
IPPacket->TotalSize = WN2H(((PIPv4_HEADER)IPPacket->Header)->TotalLength);
AddrInitIPv4(&IPPacket->SrcAddr, ((PIPv4_HEADER)IPPacket->Header)->SrcAddr);
AddrInitIPv4(&IPPacket->DstAddr, ((PIPv4_HEADER)IPPacket->Header)->DstAddr);
IPPacket->Position += IPPacket->HeaderSize;//负载部分的偏移位置
IPPacket->Data = (PVOID)((ULONG_PTR)IPPacket->Header + IPPacket->HeaderSize);
ProcessFragment(IF, IPPacket);//拼接ip报文片段(IPPacket可能是一个片段)
}
当ProcessFragment拼接成一个完整的数据报后,内部就会调用IPDispatchProtocol函数,将IP报文上交给上层相应的协议去接收处理(上层的协议可能是tcp、udp、icmp、igmp等协议)
VOID IPDispatchProtocol(
PIP_INTERFACE Interface,//来自网卡
PIP_PACKET IPPacket)//完整IP报文
{
UINT Protocol;
IP_ADDRESS SrcAddress;
switch (IPPacket->Type) {
case IP_ADDRESS_V4:
Protocol = ((PIPv4_HEADER)(IPPacket->Header))->Protocol;//上层协议
AddrInitIPv4(&SrcAddress, ((PIPv4_HEADER)(IPPacket->Header))->SrcAddr);
break;
case IP_ADDRESS_V6:
return;
default:
return;
}
NBResetNeighborTimeout(&SrcAddress);
if (Protocol < IP_PROTOCOL_TABLE_SIZE)
(*ProtocolTable[Protocol])(Interface, IPPacket);//关键。上交给相应的上层协议去接收处理
}
Tcp协议的接收处理函数是TcpReceive,Udp协议的接收处理函数是UdpReceive,我们看看是如何接收、解析udp报文的。
VOID UDPReceive(PIP_INTERFACE Interface, PIP_PACKET IPPacket)//udp报文
{
AF_SEARCH SearchContext;
PIPv4_HEADER IPv4Header;
PADDRESS_FILE AddrFile;
PUDP_HEADER UDPHeader;
PIP_ADDRESS DstAddress, SrcAddress;
UINT DataSize, i;
switch (IPPacket->Type) {
case IP_ADDRESS_V4:
IPv4Header = IPPacket->Header;
DstAddress = &IPPacket->DstAddr;
SrcAddress = &IPPacket->SrcAddr;
break;
case IP_ADDRESS_V6:
return;
default:
return;
}
UDPHeader = (PUDP_HEADER)IPPacket->Data;
i = UDPv4ChecksumCalculate(IPv4Header, (PUCHAR)UDPHeader,WH2N(UDPHeader->Length));
if (i != DH2N(0x0000FFFF) && UDPHeader->Checksum != 0)//校验失败,简单丢弃
return;
i = WH2N(UDPHeader->Length);//i=udp报文总长
if ((i < sizeof(UDP_HEADER)) || (i > IPPacket->TotalSize - IPPacket->Position))
return;//错误报文简单丢弃
DataSize = i - sizeof(UDP_HEADER);//负载部分的长度
IPPacket->Data = (PVOID)((ULONG_PTR)IPPacket->Data + sizeof(UDP_HEADER));//负载位置
AddrFile = AddrSearchFirst(DstAddress,UDPHeader->DestPort,IPPROTO_UDP,&SearchContext);
if (AddrFile)
{
do {
DGDeliverData(AddrFile,//投递给目标socket
SrcAddress,DstAddress,
UDPHeader->SourcePort,UDPHeader->DestPort,
IPPacket,DataSize);
} while ((AddrFile = AddrSearchNext(&SearchContext)) != NULL);//查找下一个目标socket
}
}
如上,udp层是怎么处理接收到的报文的呢?它先检查校验和,不正确的话就简单丢包(因此,udp协议不可靠)。然后,将将这个udp报文投递给所有符合的socket(一个DstAddr:DstPort可能对应多个socket)
DGDeliverData函数暂时就不看了。
总结一下:每当网卡收到一个包后的处理流程为:isr->dpc->工作项->各协议层的接收处理函数
下面我们看IP报文的发送过程:
NTSTATUS IPSendDatagram(PIP_PACKET IPPacket,//完整ip报文
PNEIGHBOR_CACHE_ENTRY NCE,//根据路由表得出的目标邻接点
PIP_TRANSMIT_COMPLETE Complete, PVOID Context)//完成例程
{
//超出MTU将分成片段发出去
return SendFragments(IPPacket, NCE, NCE->Interface->MTU,Complete, Context);
}
NTSTATUS SendFragments(
PIP_PACKET IPPacket,//完整IP报文
PNEIGHBOR_CACHE_ENTRY NCE,//目标邻接点
UINT PathMTU,//MTU
PIP_TRANSMIT_COMPLETE Complete,
PVOID Context)
{
PIPFRAGMENT_CONTEXT IFC;
NDIS_STATUS NdisStatus;
PVOID Data;
UINT BufferSize = PathMTU, InSize;
PCHAR InData;
GetDataPtr( IPPacket->NdisPacket, 0, &InData, &InSize );
if( InSize < BufferSize ) BufferSize = InSize;//分割成一个最大为MTU片段包
//IFC就表示一个片段包的发送上下文
IFC = ExAllocatePoolWithTag(NonPagedPool, sizeof(IPFRAGMENT_CONTEXT), IFC_TAG);
//NdisPacket就是一个片段包
NdisStatus = AllocatePacketWithBuffer ( &IFC->NdisPacket, NULL, BufferSize );
GetDataPtr( IFC->NdisPacket, 0, (PCHAR *)&Data, &InSize );
IFC->Header = ((PCHAR)Data);
IFC->Datagram = IPPacket->NdisPacket;//所属完整包
IFC->DatagramData = ((PCHAR)IPPacket->Header) + IPPacket->HeaderSize;
IFC->HeaderSize = IPPacket->HeaderSize;
IFC->PathMTU = PathMTU;
IFC->NCE = NCE;//目标邻接点
IFC->Position = 0;
IFC->BytesLeft = IPPacket->TotalSize - IPPacket->HeaderSize;
IFC->Data = (PVOID)((ULONG_PTR)IFC->Header + IPPacket->HeaderSize);
IFC->Complete = Complete;
IFC->Context = Context;
RtlCopyMemory( IFC->Header, IPPacket->Header, IPPacket->HeaderSize );
PrepareNextFragment(IFC));
NdisStatus = IPSendFragment(IFC->NdisPacket, NCE, IFC);//将片段包发给指定邻接点
return NdisStatus;
}
NTSTATUS IPSendFragment(
PNDIS_PACKET NdisPacket,
PNEIGHBOR_CACHE_ENTRY NCE,
PIPFRAGMENT_CONTEXT IFC)
{
return NBQueuePacket(NCE, NdisPacket, IPSendComplete, IFC);//挂入指定邻接点的发送队列
}
接着看:
BOOLEAN NBQueuePacket(
PNEIGHBOR_CACHE_ENTRY NCE,
PNDIS_PACKET NdisPacket,//片段包
PNEIGHBOR_PACKET_COMPLETE PacketComplete,
PVOID PacketContext)
{
KIRQL OldIrql;
PNEIGHBOR_PACKET Packet;
UINT HashValue;
//邻接点发送队列中的包结构
Packet = ExAllocatePoolWithTag( NonPagedPool, sizeof(NEIGHBOR_PACKET),NEIGHBOR_PACKET_TAG );
HashValue = *(PULONG)(&NCE->Address.Address);
HashValue ^= HashValue >> 16;
HashValue ^= HashValue >> 8;
HashValue ^= HashValue >> 4;
HashValue &= NB_HASHMASK;
TcpipAcquireSpinLock(&NeighborCache[HashValue].Lock, &OldIrql);
Packet->Complete = PacketComplete;
Packet->Context = PacketContext;
Packet->Packet = NdisPacket;//片段包
InsertTailList( &NCE->PacketQueue, &Packet->Next );//挂入队列
TcpipReleaseSpinLock(&NeighborCache[HashValue].Lock, OldIrql);
if( !(NCE->State & NUD_INCOMPLETE) )
NBSendPackets( NCE );//立即调用下层小端口驱动提供的发送函数进行发送
return TRUE;
}
VOID NBSendPackets( PNEIGHBOR_CACHE_ENTRY NCE )
{
PLIST_ENTRY PacketEntry;
PNEIGHBOR_PACKET Packet;
UINT HashValue;
HashValue = *(PULONG)(&NCE->Address.Address);
HashValue ^= HashValue >> 16;
HashValue ^= HashValue >> 8;
HashValue ^= HashValue >> 4;
HashValue &= NB_HASHMASK;
//发送队列中所有待发包
while ((PacketEntry = ExInterlockedRemoveHeadList(&NCE->PacketQueue,
&NeighborCache[HashValue].Lock)) != NULL)
{
Packet = CONTAINING_RECORD( PacketEntry, NEIGHBOR_PACKET, Next );
PC(Packet->Packet)->DLComplete = NBCompleteSend;
PC(Packet->Packet)->Context = Packet;
NCE->Interface->Transmit(
NCE->Interface->Context,//目标网卡
Packet->Packet,//片段包
0,
NCE->LinkAddress,//目标邻接点的MAC地址
LAN_PROTO_IPv4 );
}
}
实际上,Transmit最终调用了小端口驱动自身提供的发送函数,将包给网卡。至于小端口驱动是怎么发出去的?一般小端口驱动会检查网卡芯片内部的硬件发送缓冲区是否空闲,若是,就立即写入硬件发送缓冲区中,否则,小端口驱动内部也维护一个发送队列,将暂时不能发的包储存在那个队列中。
邻接点是什么?邻接点就是发往目标机器的中途路径上的一台机器,一般就是默认网关。如果本机装了多个网卡,tcpip就会自动根据主机的路由表选择路由,将帧通过合适的网卡发给合适的邻接点。
Tcpip.sys内部创建了好几个设备对象,应用程序可以直接打开那些设备收发报文,不过,并不是通过IRM_MJ_READ、IRP_MJ_WRITE来收发报文的,而是通过IRP_MJ_INTERNAL_DEVICE_CONTROL进行。但是这样很麻烦,不好控制,应用程序一般借助socket来收发报文(驱动型木马往往直接使用IRP_MJ_INTERNAL_DEVICE_CONTROL来收发报文悄悄进行网络通信)。
Windows中的socket机制不同于unix,Windows中,socket api并不是系统调用,它的实现机制分为用户空间和内核空间。用户空间便是ws2_32.dll,内核空间便是afd.sys这个‘通用socket驱动’。为什么说是通用的呢?因为socket分为好几种socket:tcpip、ipx、netbios、AppleTalk等等。Afd驱动下层可以搭配任意协议驱动,只要那个协议驱动对afd提供tdi接口即可。在Windows中,各种协议驱动又叫服务提供者,afd驱动又叫服务使用者。下面我们看afd的DriverEntry。
NTSTATUS DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING RegistryPath)
{
PDEVICE_OBJECT DeviceObject;
UNICODE_STRING wstrDeviceName = RTL_CONSTANT_STRING(L"\\Device\\Afd");
PAFD_DEVICE_EXTENSION DeviceExt;
NTSTATUS Status;
//均为AfdDispatch
DriverObject->MajorFunction[IRP_MJ_CLOSE] = AfdDispatch;
DriverObject->MajorFunction[IRP_MJ_CREATE] = AfdDispatch;
DriverObject->MajorFunction[IRP_MJ_CLEANUP] = AfdDispatch;
DriverObject->MajorFunction[IRP_MJ_WRITE] = AfdDispatch;
DriverObject->MajorFunction[IRP_MJ_READ] = AfdDispatch;
DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = AfdDispatch;
DriverObject->DriverUnload = AfdUnload;
//创建了一个afd套接字驱动设备对象
Status = IoCreateDevice( DriverObject,sizeof(AFD_DEVICE_EXTENSION),&wstrDeviceName,
FILE_DEVICE_NAMED_PIPE,0,FALSE,&DeviceObject );
DeviceExt = DeviceObject->DeviceExtension;
KeInitializeSpinLock( &DeviceExt->Lock );
InitializeListHead( &DeviceExt->Polls );
return (Status);
}
Socket api内部转换成socket irp发给afd设备,看看afd驱动是如何处理各种socket irp的
NTSTATUS AfdDispatch(PDEVICE_OBJECT DeviceObject, PIRP Irp)
{
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
NTSTATUS Status = STATUS_NOT_IMPLEMENTED;
Irp->IoStatus.Information = 0;
switch(IrpSp->MajorFunction)
{
case IRP_MJ_CREATE:
return AfdCreateSocket(DeviceObject, Irp, IrpSp);
case IRP_MJ_CLOSE:
return AfdCloseSocket(DeviceObject, Irp, IrpSp);
case IRP_MJ_CLEANUP:
return AfdCleanupSocket(DeviceObject, Irp, IrpSp);
case IRP_MJ_WRITE:
return AfdConnectedSocketWriteData( DeviceObject, Irp, IrpSp, TRUE );
case IRP_MJ_READ:
return AfdConnectedSocketReadData( DeviceObject, Irp, IrpSp, TRUE );
case IRP_MJ_DEVICE_CONTROL:
{
switch( IrpSp->Parameters.DeviceIoControl.IoControlCode ) {
case IOCTL_AFD_BIND:
return AfdBindSocket( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_CONNECT:
return AfdStreamSocketConnect( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_START_LISTEN:
return AfdListenSocket( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_RECV:
return AfdConnectedSocketReadData( DeviceObject, Irp, IrpSp,FALSE );
case IOCTL_AFD_SELECT:
return AfdSelect( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_EVENT_SELECT:
return AfdEventSelect( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_ENUM_NETWORK_EVENTS:
return AfdEnumEvents( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_RECV_DATAGRAM:
return AfdPacketSocketReadData( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_SEND:
return AfdConnectedSocketWriteData( DeviceObject, Irp, IrpSp,FALSE );
case IOCTL_AFD_SEND_DATAGRAM:
return AfdPacketSocketWriteData( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_GET_INFO:
return AfdGetInfo( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_SET_INFO:
return AfdSetInfo( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_GET_CONTEXT_SIZE:
return AfdGetContextSize( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_GET_CONTEXT:
return AfdGetContext( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_SET_CONTEXT:
return AfdSetContext( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_WAIT_FOR_LISTEN:
return AfdWaitForListen( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_ACCEPT:
return AfdAccept( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_DISCONNECT:
return AfdDisconnect( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_GET_SOCK_NAME:
return AfdGetSockName( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_GET_PEER_NAME:
return AfdGetPeerName( DeviceObject, Irp, IrpSp );
case IOCTL_AFD_GET_CONNECT_DATA:
return AfdGetConnectData(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_SET_CONNECT_DATA:
return AfdSetConnectData(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_SET_DISCONNECT_DATA:
return AfdSetDisconnectData(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_GET_DISCONNECT_DATA:
return AfdGetDisconnectData(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_SET_CONNECT_DATA_SIZE:
return AfdSetConnectDataSize(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_SET_DISCONNECT_DATA_SIZE:
return AfdSetDisconnectDataSize(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_SET_CONNECT_OPTIONS:
return AfdSetConnectOptions(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_SET_DISCONNECT_OPTIONS:
return AfdSetDisconnectOptions(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_GET_CONNECT_OPTIONS:
return AfdGetConnectOptions(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_GET_DISCONNECT_OPTIONS:
return AfdGetDisconnectOptions(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_SET_CONNECT_OPTIONS_SIZE:
return AfdSetConnectOptionsSize(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_SET_DISCONNECT_OPTIONS_SIZE:
return AfdSetDisconnectOptionsSize(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_GET_TDI_HANDLES:
return AfdGetTdiHandles(DeviceObject, Irp, IrpSp);
case IOCTL_AFD_DEFER_ACCEPT:
DbgPrint("IOCTL_AFD_DEFER_ACCEPT is UNIMPLEMENTED!\n");
break;
case IOCTL_AFD_GET_PENDING_CONNECT_DATA:
DbgPrint("IOCTL_AFD_GET_PENDING_CONNECT_DATA is UNIMPLEMENTED!\n");
break;
case IOCTL_AFD_VALIDATE_GROUP:
DbgPrint("IOCTL_AFD_VALIDATE_GROUP is UNIMPLEMENTED!\n");
break;
default:
Status = STATUS_NOT_SUPPORTED;
break;
}
break;
}
default:
{
Status = STATUS_NOT_IMPLEMENTED;
break;
}
}
Irp->IoStatus.Status = Status;
IoCompleteRequest(Irp, IO_NO_INCREMENT);
return (Status);
}
实际上,应用程序可以直接打开这个套接字驱动设备对象进行通信,但是应用程序很少这样做,因为不方便。为此,微软为afd套接字驱动提供了用户空间匹配的模块ws2_32.dll,通过socket api 来间接打开afd设备与afd驱动进行交互。Socket api除了方便外,另一个好处便是兼容unix,可移植。我们看下ws2_32.dll的DllMain
BOOL
DllMain(HANDLE hInstDll,
ULONG dwReason,
LPVOID lpReserved)
{
PWINSOCK_THREAD_BLOCK p;
switch (dwReason)
{
case DLL_PROCESS_ATTACH:
{
GlobalHeap = GetProcessHeap();
g_hInstDll = hInstDll;
CreateCatalog();
InitProviderHandleTable();//初始化提供者的处理函数表
UpcallTable.lpWPUCloseEvent = WPUCloseEvent;
UpcallTable.lpWPUCloseSocketHandle = WPUCloseSocketHandle;
UpcallTable.lpWPUCreateEvent = WPUCreateEvent;
UpcallTable.lpWPUCreateSocketHandle = WPUCreateSocketHandle;
UpcallTable.lpWPUFDIsSet = WPUFDIsSet;
UpcallTable.lpWPUGetProviderPath = WPUGetProviderPath;
UpcallTable.lpWPUModifyIFSHandle = WPUModifyIFSHandle;
UpcallTable.lpWPUPostMessage = PostMessageW;
UpcallTable.lpWPUQueryBlockingCallback = WPUQueryBlockingCallback;
UpcallTable.lpWPUQuerySocketHandleContext = WPUQuerySocketHandleContext;
UpcallTable.lpWPUQueueApc = WPUQueueApc;
UpcallTable.lpWPUResetEvent = WPUResetEvent;
UpcallTable.lpWPUSetEvent = WPUSetEvent;
UpcallTable.lpWPUOpenCurrentThread = WPUOpenCurrentThread;
UpcallTable.lpWPUCloseThread = WPUCloseThread;
}
case DLL_THREAD_ATTACH://重点
{
p = HeapAlloc(GlobalHeap, 0, sizeof(WINSOCK_THREAD_BLOCK));
p->Hostent = NULL;
p->LastErrorValue = NO_ERROR;//每个线程的socket LastError
p->Getservbyname = NULL;
p->Getservbyport = NULL;
NtCurrentTeb()->WinSockData = p;//每个线程有一个socket信息块
}
break;
case DLL_PROCESS_DETACH:
{
DestroyCatalog();
FreeProviderHandleTable();
}
break;
case DLL_THREAD_DETACH:
{
p = NtCurrentTeb()->WinSockData;
if (p)
HeapFree(GlobalHeap, 0, p);
}
break;
}
return TRUE;
}
Socket的创建:
SOCKET
socket(IN INT af,//家族
IN INT type,//类型(报式/流式)
IN INT protocol)//协议
{
return WSASocketW(af,type,protocol,NULL,0,0);
}
SOCKET
WSASocketW(IN INT af,IN INT type,IN INT protocol,
IN LPWSAPROTOCOL_INFOW lpProtocolInfo,IN GROUP g,IN DWORD dwFlags)
{
INT Status;
SOCKET Socket;
PCATALOG_ENTRY Provider;
WSAPROTOCOL_INFOW ProtocolInfo;
if (!WSAINITIALIZED)
{
WSASetLastError(WSANOTINITIALISED);
return INVALID_SOCKET;
}
if (!lpProtocolInfo)
{
lpProtocolInfo = &ProtocolInfo;
ZeroMemory(&ProtocolInfo, sizeof(WSAPROTOCOL_INFOW));
ProtocolInfo.iAddressFamily = af;
ProtocolInfo.iSocketType = type;
ProtocolInfo.iProtocol = protocol;
}
Provider = LocateProvider(lpProtocolInfo);//查找相应的服务提供者
if (!Provider)
{
WSASetLastError(WSAEAFNOSUPPORT);
return INVALID_SOCKET;
}
Status = LoadProvider(Provider, lpProtocolInfo);//加载服务提供者
//调用相应提供者的套接字创建函数,tcpip的是WSPSocket函数
Socket = Provider->ProcTable.lpWSPSocket(af,type,protocol,lpProtocolInfo,
g,dwFlags,&Status);
return Socket;//返回套接字句柄
}
SOCKET
WSPSocket(int AddressFamily,int SocketType,int Protocol,
LPWSAPROTOCOL_INFOW lpProtocolInfo,GROUP g,
DWORD dwFlags,LPINT lpErrno)
{
PSOCKET_INFORMATION Socket = NULL;
PFILE_FULL_EA_INFORMATION EABuffer = NULL;
//根据该套接字的家族、类型、协议 匹配决定出要使用哪种下层协议驱动和传输层设备对象,返回到TransportName参数中
Status = SockGetTdiName (&AddressFamily,&SocketType,&Protocol,g,dwFlags,
&TransportName,//OUT
&HelperDLLContext,// OUT
&HelperData,//OUT
&HelperEvents);//OUT
RtlInitUnicodeString(&DevName, L"\\Device\\Afd\\Endpoint");//端点管理设备
Socket = HeapAlloc(GlobalHeap, 0, sizeof(*Socket));//socket信息,将会加入全局链表进行维护
RtlZeroMemory(Socket, sizeof(*Socket));
Socket->RefCount = 2;
Socket->Handle = -1;//无效句柄
Socket->SharedData.Listening = FALSE;
Socket->SharedData.State = SocketOpen;
Socket->SharedData.AddressFamily = AddressFamily;
Socket->SharedData.SocketType = SocketType;
Socket->SharedData.Protocol = Protocol;
Socket->HelperContext = HelperDLLContext;
Socket->HelperData = HelperData;
Socket->HelperEvents = HelperEvents;
Socket->LocalAddress = &Socket->WSLocalAddress;
Socket->SharedData.SizeOfLocalAddress = HelperData->MaxWSAddressLength;
Socket->RemoteAddress = &Socket->WSRemoteAddress;
Socket->SharedData.SizeOfRemoteAddress = HelperData->MaxWSAddressLength;
Socket->SharedData.UseDelayedAcceptance = HelperData->UseDelayedAcceptance;
Socket->SharedData.CreateFlags = dwFlags;
Socket->SharedData.CatalogEntryId = lpProtocolInfo->dwCatalogEntryId;
Socket->SharedData.ServiceFlags1 = lpProtocolInfo->dwServiceFlags1;
Socket->SharedData.ProviderFlags = lpProtocolInfo->dwProviderFlags;
Socket->SharedData.GroupID = g;
Socket->SharedData.GroupType = 0;
Socket->SharedData.UseSAN = FALSE;
Socket->SharedData.NonBlocking = FALSE;//默认为阻塞方式
Socket->SanData = NULL;
if( Socket->SharedData.SocketType == SOCK_DGRAM ||
Socket->SharedData.SocketType == SOCK_RAW )
{
Socket->SharedData.ServiceFlags1 |= XP1_CONNECTIONLESS;
}
SizeOfPacket = TransportName.Length + sizeof(AFD_CREATE_PACKET) + sizeof(WCHAR);
SizeOfEA = sizeof(FILE_FULL_EA_INFORMATION) + AFD_PACKET_COMMAND_LENGTH + SizeOfPacket;
EABuffer = HeapAlloc(GlobalHeap, 0, SizeOfEA);//EA附加属性就是一个AFD_CREATE_PACKET结构体
RtlZeroMemory(EABuffer, SizeOfEA);
EABuffer->NextEntryOffset = 0;
EABuffer->Flags = 0;
EABuffer->EaNameLength = AFD_PACKET_COMMAND_LENGTH;
RtlCopyMemory (EABuffer->EaName, AfdCommand, AFD_PACKET_COMMAND_LENGTH + 1);
EABuffer->EaValueLength = SizeOfPacket;
AfdPacket = (PAFD_CREATE_PACKET)(EABuffer->EaName + EABuffer->EaNameLength + 1);
AfdPacket->SizeOfTransportName = TransportName.Length;
//记录该套接字下层使用的传输层设备对象名
RtlCopyMemory (AfdPacket->TransportName,TransportName.Buffer,
TransportName.Length + sizeof(WCHAR));
AfdPacket->GroupID = g;
if ((Socket->SharedData.ServiceFlags1 & XP1_CONNECTIONLESS) != 0)
{
if ((SocketType != SOCK_DGRAM) && (SocketType != SOCK_RAW))
goto error;
AfdPacket->EndpointFlags |= AFD_ENDPOINT_CONNECTIONLESS;
}
if ((Socket->SharedData.ServiceFlags1 & XP1_MESSAGE_ORIENTED) != 0)
{
if (SocketType == SOCK_STREAM)
{
if ((Socket->SharedData.ServiceFlags1 & XP1_PSEUDO_STREAM) == 0)
goto error;
}
AfdPacket->EndpointFlags |= AFD_ENDPOINT_MESSAGE_ORIENTED;
}
if (SocketType == SOCK_RAW) AfdPacket->EndpointFlags |= AFD_ENDPOINT_RAW;
InitializeObjectAttributes (&Object,&DevName,OBJ_CASE_INSENSITIVE | OBJ_INHERIT,0,0);
//关键。打开afd驱动中的设备,创建一个套接字文件对象,返回套接字句柄到Sock参数中
Status = NtCreateFile(&Sock,GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE,&Object,
&IOSB,NULL,0,FILE_SHARE_READ | FILE_SHARE_WRITE,
FILE_OPEN_IF,0,EABuffer,SizeOfEA);
HeapFree(GlobalHeap, 0, EABuffer);
Socket->Handle = (SOCKET)Sock;//记录句柄
if (g != 0) …
//即FCB->Send.Size:该套接字的UDP发送缓冲区大小,默认为16384B
GetSocketInformation (Socket,AFD_INFO_SEND_WINDOW_SIZE,
&Socket->SharedData.SizeOfSendBuffer,NULL);
//即FCB->Recv.Size:该套接字的UDP接收缓冲区大小,默认为16384B
GetSocketInformation (Socket,AFD_INFO_RECEIVE_WINDOW_SIZE,
&Socket->SharedData.SizeOfRecvBuffer,NULL);
EnterCriticalSection(&SocketListLock);
Socket->NextSocket = SocketListHead;
SocketListHead = Socket;//将新创建的套接字加入全局链表
LeaveCriticalSection(&SocketListLock);
CreateContext(Socket);
Upcalls.lpWPUModifyIFSHandle(1, (SOCKET)Sock, lpErrno);
return (SOCKET)Sock;//返回套接字句柄
error:…
}
实际上,上面的函数会调用NtCreateFile打开设备,创建一个套接字文件对象,然后返回该文件对象的句柄(即套接字句柄)给用户。NtCreateFile内部在IopParseDevice中会创建一个文件对象,然后,会打开目标afd设备,生成IRP_MJ_CREATE发给目标设备,最终进入AfdDispatch这个派遣例程中处理该irp,前面看到,具体处理这种IRP的是下面的函数
NTSTATUS
AfdCreateSocket(PDEVICE_OBJECT DeviceObject, PIRP Irp,PIO_STACK_LOCATION IrpSp)
{
PAFD_FCB FCB;
PFILE_OBJECT FileObject;
PAFD_DEVICE_EXTENSION DeviceExt;
PFILE_FULL_EA_INFORMATION EaInfo;
PAFD_CREATE_PACKET ConnectInfo = NULL;
ULONG EaLength;
PWCHAR EaInfoValue = NULL;
UINT Disposition, i;
NTSTATUS Status = STATUS_SUCCESS;
DeviceExt = DeviceObject->DeviceExtension;
FileObject = IrpSp->FileObject;
Disposition = (IrpSp->Parameters.Create.Options >> 24) & 0xff;
Irp->IoStatus.Information = 0;
//IRP_MJ_CREATE这种irp的SystemBuffer就是EA附加属性
EaInfo = Irp->AssociatedIrp.SystemBuffer;
if( EaInfo )
{
ConnectInfo = (PAFD_CREATE_PACKET)(EaInfo->EaName + EaInfo->EaNameLength + 1);
EaInfoValue = (PWCHAR)(((PCHAR)ConnectInfo) + sizeof(AFD_CREATE_PACKET));
EaLength = sizeof(FILE_FULL_EA_INFORMATION) +EaInfo->EaNameLength +EaInfo->EaValueLength;
}
//分配一个socket FCB,用来记录socket文件对象信息
FCB = ExAllocatePool(NonPagedPool, sizeof(AFD_FCB));
RtlZeroMemory( FCB, sizeof( *FCB ) );
FileObject->FsContext = FCB;//关键。该文件对象的FCB指向这个socket FCB
FCB->Flags = ConnectInfo ? ConnectInfo->EndpointFlags : 0;
FCB->GroupID = ConnectInfo ? ConnectInfo->GroupID : 0;
FCB->State = SOCKET_STATE_CREATED;
FCB->FileObject = FileObject;//关联文件对象
FCB->DeviceExt = DeviceExt;//关联的afd设备对象
FCB->AddressFile.Handle = INVALID_HANDLE_VALUE;//本套接字绑定的本地地址
FCB->Connection.Handle = INVALID_HANDLE_VALUE;
KeInitializeMutex( &FCB->Mutex, 0 );
for( i = 0; i < 6; i++ )
InitializeListHead( &FCB->PendingIrpList[i] );//关联6个irp队列(用于异步模式)
InitializeListHead( &FCB->DatagramList );//初始化udp收包队列
InitializeListHead( &FCB->PendingConnections );//初始化tcp收包队列
if( ConnectInfo ) {
FCB->TdiDeviceName.Length = ConnectInfo->SizeOfTransportName;
FCB->TdiDeviceName.MaximumLength = FCB->TdiDeviceName.Length;
FCB->TdiDeviceName.Buffer = ExAllocatePool( NonPagedPool, FCB->TdiDeviceName.Length );
RtlCopyMemory( FCB->TdiDeviceName.Buffer,ConnectInfo->TransportName,
FCB->TdiDeviceName.Length );
}
if( FCB->Flags & AFD_ENDPOINT_CONNECTIONLESS ) {
FCB->PollState |= AFD_EVENT_SEND;//套接字状态为:可发送
FCB->PollStatus[FD_WRITE_BIT] = STATUS_SUCCESS;
PollReeval( FCB->DeviceExt, FCB->FileObject );
}
if( !NT_SUCCESS(Status) ) 。。。
Irp->IoStatus.Status = Status;
IoCompleteRequest( Irp, IO_NETWORK_INCREMENT );
return Status;
}
如上,每当创建一个套接字的时候,就为其准备一个irp队列、udp收包队列、tcp收包队列,分配关联一个FCB记录其他方面的套接字信息。
Afd相关的概念有:套接字驱动、套接字设备、套接字文件、套接字FCB
套接字驱动:afd.sys
套接字设备:\Device\Afd\Endpoint
套接字文件对象:每打开一次套接字设备生成一个套接字文件对象
套接字FCB:每个套接字文件对象关联的FCB
套接字创建完毕后,还需要绑定IP地址与端口号。注意套接字是afd驱动中的概念和术语,传输层并没有这种说法,传输层中对应会创建一个地址对象,来表示afd中的socket。Afd中的socket绑定的就是传输层中的地址对象。传输层中有一个地址对象列表,维护记录着所有创建的地址对象。下面看一下套接字的绑定过程。“创绑地址文件”。(一个地址文件就代表一个地址对象)
INT
bind(IN SOCKET s,
IN CONST struct sockaddr *name,
IN INT namelen)
{
PCATALOG_ENTRY Provider;
INT Status;
INT Errno;
if (!WSAINITIALIZED)
{
WSASetLastError(WSANOTINITIALISED);
return SOCKET_ERROR;
}
//获得该套接字使用的服务提供者
ReferenceProviderByHandle((HANDLE)s,&Provider);
// lpWSPBind在tcpip下实际上是WSPBind函数
Status = Provider->ProcTable.lpWSPBind(s,name,namelen,&Errno);
DereferenceProviderByPointer(Provider);
if (Status == SOCKET_ERROR)
WSASetLastError(Errno);
return Status;
}
INT
WSPBind(SOCKET Handle,
const struct sockaddr *SocketAddress,
int SocketAddressLength,
LPINT lpErrno)
{
IO_STATUS_BLOCK IOSB;
PAFD_BIND_DATA BindData;
PSOCKET_INFORMATION Socket = NULL;
NTSTATUS Status;
SOCKADDR_INFO SocketInfo;
HANDLE SockEvent;
BindData = HeapAlloc(GlobalHeap, 0, 0xA + SocketAddressLength);
Status = NtCreateEvent(&SockEvent,GENERIC_READ | GENERIC_WRITE,NULL,1,FALSE);
Socket = GetSocketStructure(Handle);//根据套接字句柄查找socket结构
BindData->Address.TAAddressCount = 1;
BindData->Address.Address[0].AddressLength = SocketAddressLength - sizeof(SocketAddress->sa_family);
BindData->Address.Address[0].AddressType = SocketAddress->sa_family;
RtlCopyMemory (BindData->Address.Address[0].Address, SocketAddress->sa_data,
SocketAddressLength - sizeof(SocketAddress->sa_family));
Socket->HelperData->WSHGetSockaddrType ((PSOCKADDR)SocketAddress,
SocketAddressLength,&SocketInfo);
if (Socket->SharedData.ExclusiveAddressUse)
BindData->ShareType = AFD_SHARE_EXCLUSIVE;
else if (SocketInfo.EndpointInfo == SockaddrEndpointInfoWildcard)
BindData->ShareType = AFD_SHARE_WILDCARD;
else if (Socket->SharedData.ReuseAddresses)
BindData->ShareType = AFD_SHARE_REUSE;
else
BindData->ShareType = AFD_SHARE_UNIQUE;
//向afd中的套接字设备发送一个‘绑定请求’irp
Status = NtDeviceIoControlFile((HANDLE)Socket->Handle,SockEvent,NULL,NULL,&IOSB,
IOCTL_AFD_BIND,BindData,
0xA + Socket->SharedData.SizeOfLocalAddress, BindData,
0xA + Socket->SharedData.SizeOfLocalAddress);
if (Status == STATUS_PENDING)
{
WaitForSingleObject(SockEvent, INFINITE);
Status = IOSB.Status;
}
NtClose( SockEvent );
HeapFree(GlobalHeap, 0, BindData);
if (Status != STATUS_SUCCESS)
return MsafdReturnWithErrno ( Status, lpErrno, 0, NULL );
Socket->SharedData.State = SocketBound;//已完成绑定
Socket->TdiAddressHandle = (HANDLE)IOSB.Information;
if (Socket->HelperEvents & WSH_NOTIFY_BIND)
{
Status = Socket->HelperData->WSHNotify(Socket->HelperContext,Socket->Handle,
Socket->TdiAddressHandle,
Socket->TdiConnectionHandle,
WSH_NOTIFY_BIND);
if (Status)
{
if (lpErrno) *lpErrno = Status;
return SOCKET_ERROR;
}
}
return MsafdReturnWithErrno ( Status, lpErrno, 0, NULL );
}
看看afd驱动是如何处理绑定请求的
NTSTATUS
AfdBindSocket(PDEVICE_OBJECT DeviceObject, PIRP Irp,PIO_STACK_LOCATION IrpSp)
{
NTSTATUS Status = STATUS_SUCCESS;
PFILE_OBJECT FileObject = IrpSp->FileObject;//地址文件对象
PAFD_FCB FCB = FileObject->FsContext;//套接字FCB
PAFD_BIND_DATA BindReq;
if( !SocketAcquireStateLock( FCB ) ) return LostSocket( Irp );
if( !(BindReq = LockRequest( Irp, IrpSp )) )
return UnlockAndMaybeComplete( FCB, STATUS_NO_MEMORY,Irp, 0 );
FCB->LocalAddress = TaCopyTransportAddress( &BindReq->Address );//记录该套接字的本地地址
//初始连接向自身,使得send操作进行环回,发给自己
Status = TdiBuildConnectionInfo( &FCB->AddressFrom,FCB->LocalAddress );
if( NT_SUCCESS(Status) ) //关键。在下层的传输层驱动中为本套接字‘创绑一个地址文件’
Status = WarmSocketForBind( FCB ); //创建、绑定 一个地址文件
//if UDP 套接字,立即向传输层设备投递一个接收请求。(为什么要这样做?后文有解释)
if( FCB->Flags & AFD_ENDPOINT_CONNECTIONLESS )
{
Status = TdiReceiveDatagram
( &FCB->ReceiveIrp.InFlightRequest,FCB->AddressFile.Object,0,
FCB->Recv.Window,FCB->Recv.Size,//Window表示UDP接收缓冲区
FCB->AddressFrom,&FCB->ReceiveIrp.Iosb,PacketSocketRecvComplete,FCB );
if( Status == STATUS_PENDING ) Status = STATUS_SUCCESS;
}
if (NT_SUCCESS(Status))
FCB->State = SOCKET_STATE_BOUND;//标记已完成绑定
return UnlockAndMaybeComplete( FCB, Status, Irp, (ULONG_PTR)FCB->AddressFile.Handle );
}
如上,上面最关键的操作便是在下层的传输层驱动中创建一个地址对象,然后让afd驱动中的套接字与传输层驱动中的这个地址对象进行绑定。具体是由下面的函数完成的。
NTSTATUS WarmSocketForBind( PAFD_FCB FCB ) //套接字FCB
{
NTSTATUS Status;
//在传输层创建一个地址对象进行绑定
Status = TdiOpenAddressFile(&FCB->TdiDeviceName,//目标下层传输层设备 tcp\udp\RawIP之一
FCB->LocalAddress,//要绑定的目标地址
&FCB->AddressFile.Handle,//返回绑定的地址文件句柄
&FCB->AddressFile.Object );//返回绑定的地址文件对象
if (!NT_SUCCESS(Status))
return Status;
if (FCB->Flags & AFD_ENDPOINT_CONNECTIONLESS)
{ //查询那种传输层协议支持的最大udp报文长度
TdiQueryMaxDatagramLength(FCB->AddressFile.Object,&FCB->Recv.Size);
FCB->Recv.Window = ExAllocatePool(PagedPool, FCB->Recv.Size);//分配udp接收缓冲区
}
return Status;
}
由于udp协议是面向报文的,是以报文为单位进行收发的,所以要接收完整的udp报文就必须分配足够大的接收缓冲区。
NTSTATUS TdiOpenAddressFile(
PUNICODE_STRING DeviceName,//传输层的tdi设备名
PTRANSPORT_ADDRESS Name,//要绑定的地址
PHANDLE AddressHandle,//返回地址文件句柄
PFILE_OBJECT *AddressObject) 返回地址文件句柄
{
PFILE_FULL_EA_INFORMATION EaInfo;
NTSTATUS Status;
ULONG EaLength;
PTRANSPORT_ADDRESS Address;
EaLength = sizeof(FILE_FULL_EA_INFORMATION) +TDI_TRANSPORT_ADDRESS_LENGTH +
TaLengthOfTransportAddress( Name ) + 1;
EaInfo = (PFILE_FULL_EA_INFORMATION)ExAllocatePool(NonPagedPool, EaLength);
RtlZeroMemory(EaInfo, EaLength);
EaInfo->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH;
RtlCopyMemory(EaInfo->EaName,TdiTransportAddress,TDI_TRANSPORT_ADDRESS_LENGTH);
EaInfo->EaValueLength = sizeof(TA_IP_ADDRESS);
Address = (PTRANSPORT_ADDRESS)(EaInfo->EaName + TDI_TRANSPORT_ADDRESS_LENGTH + 1);
TaCopyTransportAddressInPlace( Address, Name );
//关键。打开对应传输层的tdi设备,创建一个地址文件对象,并记录到套接字FCB中进行绑定
Status = TdiOpenDevice(DeviceName,EaLength,EaInfo,AddressHandle,AddressObject);
ExFreePool(EaInfo);
return Status;
}
NTSTATUS TdiOpenDevice(
PUNICODE_STRING DeviceName,
ULONG EaLength,
PFILE_FULL_EA_INFORMATION EaInfo,
PHANDLE Handle,//返回地址文件句柄
PFILE_OBJECT *Object)//返回地址文件对象
{
OBJECT_ATTRIBUTES Attr;
IO_STATUS_BLOCK Iosb;
NTSTATUS Status;
InitializeObjectAttributes(&Attr, DeviceName,OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE,
NULL,NULL);
//关键。打开对应的传输层设备,生成一个地址文件对象
Status = ZwCreateFile(Handle,GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE,
&Attr,&Iosb,0,FILE_ATTRIBUTE_NORMAL,0,FILE_OPEN_IF,0,
EaInfo,EaLength);
if (NT_SUCCESS(Status))
{
Status = ObReferenceObjectByHandle(*Handle,GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE, IoFileObjectType,KernelMode, (PVOID*)Object,NULL);
}
return Status;
}
ZwCreateFile将给传输层的设备发送一个IRP_MJ_CREATE,程序流从afd.sys驱动进入tcpip.sys传输层驱动中的irp派遣函数中。看看下面传输层是如何处理这种irp请求的(创建、绑定地址对象请求)
NTSTATUS TiCreateFileObject(PDEVICE_OBJECT DeviceObject,PIRP Irp)
{
EaInfo = Irp->AssociatedIrp.SystemBuffer;
Context = ExAllocatePoolWithTag(NonPagedPool, sizeof(TRANSPORT_CONTEXT),TRANS_CONTEXT_TAG);
Context->CancelIrps = FALSE;
IrpSp = IoGetCurrentIrpStackLocation(Irp);
IrpSp->FileObject->FsContext = Context;
Request.RequestContext = Irp;
//if 是afd层发下来的‘创绑地址文件对象请求’
if (EaInfo && (EaInfo->EaNameLength == TDI_TRANSPORT_ADDRESS_LENGTH) &&
(RtlCompareMemory (&EaInfo->EaName, TdiTransportAddress,
TDI_TRANSPORT_ADDRESS_LENGTH) == TDI_TRANSPORT_ADDRESS_LENGTH))
{
Address = (PTA_IP_ADDRESS)(EaInfo->EaName + EaInfo->EaNameLength + 1);
if (DeviceObject == TCPDeviceObject)
Protocol = IPPROTO_TCP;
else if (DeviceObject == UDPDeviceObject)
Protocol = IPPROTO_UDP;
else if (DeviceObject == IPDeviceObject)
Protocol = IPPROTO_RAW;
else if (DeviceObject == RawIPDeviceObject)
Status = TiGetProtocolNumber(&IrpSp->FileObject->FileName, &Protocol);
else
{
ExFreePoolWithTag(Context, TRANS_CONTEXT_TAG);
return STATUS_INVALID_PARAMETER;
}
//关键。创建一个传输层的地址对象,返回到Request参数中
Status = FileOpenAddress(&Request, Address, Protocol, NULL);
if (NT_SUCCESS(Status))
{ // FsContext2标记是Context->Handle是一个传输层地址
IrpSp->FileObject->FsContext2 = (PVOID)TDI_TRANSPORT_ADDRESS_FILE;
Context->Handle.AddressHandle = Request.Handle.AddressHandle;//记录到地址文件对象的FCB
}
}
//afd层发下来连接请求
else if (EaInfo && (EaInfo->EaNameLength == TDI_CONNECTION_CONTEXT_LENGTH) &&
(RtlCompareMemory (&EaInfo->EaName, TdiConnectionContext,
TDI_CONNECTION_CONTEXT_LENGTH) == TDI_CONNECTION_CONTEXT_LENGTH))
{
if (DeviceObject != TCPDeviceObject)
{
ExFreePoolWithTag(Context, TRANS_CONTEXT_TAG);
return STATUS_INVALID_PARAMETER;
}
ClientContext = *((PVOID*)(EaInfo->EaName + EaInfo->EaNameLength));
Status = FileOpenConnection(&Request, ClientContext);
if (NT_SUCCESS(Status))
{
IrpSp->FileObject->FsContext2 = (PVOID)TDI_CONNECTION_FILE;
Context->Handle.ConnectionContext = Request.Handle.ConnectionContext;
}
}
else …
Irp->IoStatus.Status = Status;
return Status;
}
NTSTATUS FileOpenAddress(
PTDI_REQUEST Request,//OUT
PTA_IP_ADDRESS Address,//IP地址:端口号
USHORT Protocol,//TCP/UDP/RawIp
PVOID Options)
{
PADDRESS_FILE AddrFile;
//最关键、为每个afd中的socket在传输层创建一个地址对象与其绑定
AddrFile = ExAllocatePoolWithTag(NonPagedPool, sizeof(ADDRESS_FILE),ADDR_FILE_TAG);
RtlZeroMemory(AddrFile, sizeof(ADDRESS_FILE));
AddrFile->RefCount = 1;
AddrFile->Free = AddrFileFree;
AddrFile->TTL = 128;
AddrFile->DF = 0;
AddrFile->BCast = 1;
AddrFile->HeaderIncl = 1;
AddrFile->Family = Address->Address[0].AddressType;
AddrFile->Address.Address.IPv4Address = Address->Address[0].Address[0].in_addr;
AddrFile->Address.Type = IP_ADDRESS_V4;
//如果用户给定了一个非法IP地址(所有网卡均不符合)
if (!AddrIsUnspecified(&AddrFile->Address) && !AddrLocateInterface(&AddrFile->Address))
{
ExFreePoolWithTag(AddrFile, ADDR_FILE_TAG);
return STATUS_INVALID_ADDRESS;
}
switch (Protocol)
{
case IPPROTO_TCP:
//检测端口冲突、分配空闲的tcp端口号
AddrFile->Port = TCPAllocatePort(Address->Address[0].Address[0].sin_port);
if ((Address->Address[0].Address[0].sin_port &&
AddrFile->Port != Address->Address[0].Address[0].sin_port) ||
AddrFile->Port == 0xffff)
{
ExFreePoolWithTag(AddrFile, ADDR_FILE_TAG);
return STATUS_ADDRESS_ALREADY_EXISTS;
}
AddEntity(CO_TL_ENTITY, AddrFile, CO_TL_TCP);
AddrFile->Send = NULL; /* TCPSendData */
break;
case IPPROTO_UDP:
//检测端口冲突、分配空闲的udp端口号
AddrFile->Port = UDPAllocatePort(Address->Address[0].Address[0].sin_port);
if ((Address->Address[0].Address[0].sin_port &&
AddrFile->Port != Address->Address[0].Address[0].sin_port) ||
AddrFile->Port == 0xffff)
{
ExFreePoolWithTag(AddrFile, ADDR_FILE_TAG);
return STATUS_ADDRESS_ALREADY_EXISTS;
}
AddEntity(CL_TL_ENTITY, AddrFile, CL_TL_UDP);
AddrFile->Send = UDPSendDatagram;
break;
case IPPROTO_ICMP:
AddrFile->Port = 0;
AddrFile->Send = ICMPSendDatagram;
AddEntity(ER_ENTITY, AddrFile, ER_ICMP);
break;
default:
/* Use raw IP for all other protocols */
AddrFile->Port = 0;
AddrFile->Send = RawIPSendDatagram;
AddEntity(CL_TL_ENTITY, AddrFile, 0);
break;
}
AddrFile->Protocol = Protocol;
InitializeListHead(&AddrFile->ReceiveQueue);//每个地址对象有一个接收irp请求队列
InitializeListHead(&AddrFile->TransmitQueue); //每个地址对象有一个发送irp请求队列
KeInitializeSpinLock(&AddrFile->Lock);
Request->Handle.AddressHandle = AddrFile;//返回创建的地址对象给用户
//关键。将创建好的地址对象加入tcpip内部维护的地址对象列表。这样,当收到一个报文时,tcpip就能根据头部的协议、目标地址、目标端口号找打对应的地址对象,然后将报文上交给afd中所有绑定了这个地址对象的套接字。
ExInterlockedInsertTailList(&AddressFileListHead,&AddrFile->ListEntry,&AddressFileListLock);
return STATUS_SUCCESS;
}
看看UDP是如何检测、分配端口号的
UINT UDPAllocatePort( UINT HintPort )
{
if( HintPort ) //如果用户指定了端口号,就检测冲突
{
if( AllocatePort( &UDPPorts, HintPort ) ) return HintPort;//if 空闲
else return (UINT)-1;
}
//如果用户指定了端口号为0,就自动分配一个空闲未用的端口号
else return AllocatePortFromRange ( &UDPPorts, UDP_STARTING_PORT,
UDP_STARTING_PORT + UDP_DYNAMIC_PORTS );
}
看了这么多,总结一下套接字的绑定过程实际上是将Afd中创建的套接字文件对象 绑定到 tcpip传输层驱动中创建的地址对象。实际上,我们可以简单理解为:afd.套接字 绑定 传输层中的地址
其本质是通过afd给传输层设备发送一个“创绑地址文件对象请求”实现的。
一个udp套接字在绑定了传输层地址后,就可以开始收发数据报了,不用事先建立连接.我们看看udp报文的发送过程
INT
sendto(IN SOCKET s,
IN CONST CHAR FAR* buf,
IN INT len,
IN INT flags,
IN CONST struct sockaddr *to,//目标ip:port
IN INT tolen)
{
DWORD Error;
DWORD BytesSent;
WSABUF WSABuf;
WSABuf.len = len;
WSABuf.buf = buf;
Error = WSASendTo(s,&WSABuf,1,&BytesSent,flags,to,tolen,NULL,NULL);
if( Error )
return -1;//即SOCKET_ERROR
else
return BytesSent;
}
INT
WSASendTo(IN SOCKET s,
IN LPWSABUF lpBuffers,
IN DWORD dwBufferCount,
OUT LPDWORD lpNumberOfBytesSent,
IN DWORD dwFlags,
IN CONST struct sockaddr *lpTo,
IN INT iToLen,
IN LPWSAOVERLAPPED lpOverlapped,
IN LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine)
{
PCATALOG_ENTRY Provider;
INT Errno;
INT Code;
ReferenceProviderByHandle((HANDLE)s, &Provider);//获得服务提供者
// lpWSPSendTo其实是WSPSendto
Code = Provider->ProcTable.lpWSPSendTo(s,lpBuffers,dwBufferCount,lpNumberOfBytesSent,
dwFlags, (CONST LPSOCKADDR)lpTo,iToLen,
lpOverlapped,lpCompletionRoutine,NULL&Errno);
DereferenceProviderByPointer(Provider);
if (Code == SOCKET_ERROR)
WSASetLastError(Errno);
else
WSASetLastError(0);
return Code;
}
int
WSPSendTo(SOCKET Handle,
LPWSABUF lpBuffers,//缓冲数组
DWORD dwBufferCount,//缓冲个数
LPDWORD lpNumberOfBytesSent,//OUT
DWORD iFlags,
const struct sockaddr *SocketAddress,//目标ip:port
int SocketAddressLength,
LPWSAOVERLAPPED lpOverlapped,//NULL就表示同步模式
LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,//APC方式
LPWSATHREADID lpThreadId,
LPINT lpErrno)
{
HANDLE Event = NULL;
PSOCKADDR BindAddress = NULL;
Socket = GetSocketStructure(Handle);
if (Socket->SharedData.State == SocketOpen)//if 尚未绑定,就绑定在通派地址上(即全0地址)
{
BindAddressLength = Socket->HelperData->MaxWSAddressLength;
BindAddress = HeapAlloc(GlobalHeap, 0, BindAddressLength);
//即全0地址
Socket->HelperData->WSHGetWildcardSockaddr(Socket->HelperContext,
BindAddress,&BindAddressLength);
if (WSPBind(Handle, BindAddress, BindAddressLength, lpErrno) == SOCKET_ERROR)
return SOCKET_ERROR;
}
RemoteAddress = HeapAlloc(GlobalHeap, 0, 0x6 + SocketAddressLength);
Status = NtCreateEvent(&SockEvent,GENERIC_READ | GENERIC_WRITE,NULL, 1, FALSE);
//将目标地址封装成TDI格式
RemoteAddress->TAAddressCount = 1;
RemoteAddress->Address[0].AddressLength = SocketAddressLength - sizeof(SocketAddress->sa_family);
RtlCopyMemory(&RemoteAddress->Address[0].AddressType, SocketAddress, SocketAddressLength);
SendInfo.BufferArray = (PAFD_WSABUF)lpBuffers;
SendInfo.AfdFlags = Socket->SharedData.NonBlocking ? AFD_IMMEDIATE : 0;
SendInfo.BufferCount = dwBufferCount;
SendInfo.TdiConnection.RemoteAddress = RemoteAddress;
SendInfo.TdiConnection.RemoteAddressLength = Socket->HelperData->MaxTDIAddressLength;
if (lpOverlapped == NULL)//if NULL,就表示同步模式发送数据包
{
APCContext = NULL;
APCFunction = NULL;
Event = SockEvent;//使用内部事件
IOSB = &DummyIOSB;
}
else
{
if (lpCompletionRoutine == NULL)//重叠模式发送数据包
{
APCContext = lpOverlapped;
APCFunction = NULL;
Event = lpOverlapped->hEvent;//看到没,使用重叠结构中用户传入的事件对象
}
else
{
APCFunction = NULL;
APCContext = lpCompletionRoutine;//这个完成例程实际上是一个APC
SendInfo.AfdFlags |= AFD_SKIP_FIO;
}
IOSB = (PIO_STATUS_BLOCK)&lpOverlapped->Internal;//采用这个IO状态块
SendInfo.AfdFlags |= AFD_OVERLAPPED;//标志含有重叠结构,使用异步方式
}
//关键。向afd中的套接字设备发送一个irp,请求发送udp报文
Status = NtDeviceIoControlFile((HANDLE)Handle,//套接字句柄
Event,APCFunction,APCContext,IOSB,
IOCTL_AFD_SEND_DATAGRAM,//控制码
&SendInfo,sizeof(SendInfo),NULL,0);
//如果用户要求同步方式发送,就一直等待完成
if (Status == STATUS_PENDING && lpOverlapped == NULL)
{
WaitForSingleObject(SockEvent, INFINITE);
Status = IOSB->Status;
}
if (Status != STATUS_PENDING)
SockReenableAsyncSelectEvent(Socket, FD_WRITE);
return MsafdReturnWithErrno(Status, lpErrno, IOSB->Information, lpNumberOfBytesSent);
}
看看afd驱动是如何处理应用层发下来的udp报文发送请求这种irp的
NTSTATUS
AfdPacketSocketWriteData(PDEVICE_OBJECT DeviceObject, PIRP Irp,PIO_STACK_LOCATION IrpSp)
{
NTSTATUS Status = STATUS_SUCCESS;
PTDI_CONNECTION_INFORMATION TargetAddress;
PFILE_OBJECT FileObject = IrpSp->FileObject;
PAFD_FCB FCB = FileObject->FsContext;
PAFD_SEND_INFO_UDP SendReq;
ULONG Information;
if( !SocketAcquireStateLock( FCB ) ) return LostSocket( Irp );
if( FCB->State != SOCKET_STATE_BOUND )
return UnlockAndMaybeComplete ( FCB, STATUS_INVALID_PARAMETER, Irp, 0 );
if( !(SendReq = LockRequest( Irp, IrpSp )) )
return UnlockAndMaybeComplete ( FCB, STATUS_NO_MEMORY, Irp, 0 );
SendReq->BufferArray = LockBuffers( SendReq->BufferArray,SendReq->BufferCount,
NULL, NULL,FALSE, FALSE );
if( !SendReq->BufferArray )
return UnlockAndMaybeComplete( FCB, STATUS_ACCESS_VIOLATION,Irp, 0 );
Status = TdiBuildConnectionInfo( &TargetAddress, SendReq->TdiConnection.RemoteAddress );
if( NT_SUCCESS(Status) )
{
FCB->PollState &= ~AFD_EVENT_SEND;
//关键。构造一个tdi irp发往传输层设备,请求发送udp报文,返回传输层的处理结果
Status = TdiSendDatagram
( &FCB->SendIrp.InFlightRequest,
FCB->AddressFile.Object,//绑定的地址文件
SendReq->BufferArray[0].buf,SendReq->BufferArray[0].len,
TargetAddress,//目标IP:PORT
&FCB->SendIrp.Iosb, PacketSocketSendComplete, FCB );
ExFreePool( TargetAddress );
}
if( Status == STATUS_PENDING )
Status = STATUS_SUCCESS;//即使传输层尚未完成处理,也标志成功?不明白
//下面的操作直接就完成了该 socket irp,导致永远不可能返回STATUS_PENDING,也即永远不支持异步方式,这似乎有问题!
Information = SendReq->BufferArray[0].len;
UnlockBuffers(SendReq->BufferArray, SendReq->BufferCount, FALSE);
return UnlockAndMaybeComplete( FCB, Status, Irp, Information );
}
如上,afd驱动处理udp报文发送请求irp时,仅仅是将该irp转换成tdi irp,发给下层的传输层驱动而已。
NTSTATUS TdiSendDatagram(
PIRP *Irp,//OUT
PFILE_OBJECT TransportObject,//传输层的地址文件对象
PCHAR Buffer,
UINT BufferLength,
PTDI_CONNECTION_INFORMATION Addr,//目标地址
PIO_STATUS_BLOCK Iosb,
PIO_COMPLETION_ROUTINE CompletionRoutine,
PVOID CompletionContext)
{
PDEVICE_OBJECT DeviceObject;
NTSTATUS Status;
PMDL Mdl;
DeviceObject = IoGetRelatedDeviceObject(TransportObject);//TCP/UDP/RawIp三者之一
*Irp = TdiBuildInternalDeviceControlIrp
( TDI_SEND_DATAGRAM,DeviceObject,TransportObject,NULL, Iosb );
Mdl = IoAllocateMdl(Buffer,BufferLength,FALSEFALSE,NULL);
_SEH2_TRY {
MmProbeAndLockPages(Mdl, (*Irp)->RequestorMode, IoModifyAccess);//锁定在内存
} _SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER) {
IoFreeMdl(Mdl);
IoCompleteRequest(*Irp, IO_NO_INCREMENT);
*Irp = NULL;
_SEH2_YIELD(return STATUS_INSUFFICIENT_RESOURCES);
} _SEH2_END;
//构造一个报文发送请求irp
TdiBuildSendDatagram (*Irp,DeviceObject,TransportObject,
CompletionRoutine,CompletionContext,Mdl,BufferLength,Addr);
Status = TdiCall(*Irp, DeviceObject, NULL, Iosb);//将tdi irp发给下层传输层设备
return Status;
}
如上,afd层会将发下来的socket irp 转换成tdi irp 发给下层的传输层驱动。
NTSTATUS TdiCall(PIRP Irp,PDEVICE_OBJECT DeviceObject,PKEVENT Event,PIO_STATUS_BLOCK Iosb)
{
NTSTATUS Status;
Status = IoCallDriver(DeviceObject, Irp);
if ((Status == STATUS_PENDING) && (Event != NULL)) //此例Event传入的为NULL
{
KeWaitForSingleObject(Event,Executive,KernelMode,FALSE,NULL);
Status = Iosb->Status;
}
return Status;//返回传输层的处理结果
}
下面看看传输层处理这种irp的过程
NTSTATUS DispTdiSendDatagram(PIRP Irp) //传输层处理udp报文发送请求irp的函数
{
PIO_STACK_LOCATION IrpSp;
TDI_REQUEST Request;
PTDI_REQUEST_KERNEL_SENDDG DgramInfo;
PTRANSPORT_CONTEXT TranContext;
NTSTATUS Status;
IrpSp = IoGetCurrentIrpStackLocation(Irp);
DgramInfo = (PTDI_REQUEST_KERNEL_SENDDG)&(IrpSp->Parameters);
TranContext = IrpSp->FileObject->FsContext;//传输层的地址描述符
Request.Handle.AddressHandle = TranContext->Handle.AddressHandle;//地址对象句柄
Request.RequestNotifyObject = DispDataRequestComplete;
Request.RequestContext = Irp;
Status = DispPrepareIrpForCancel(IrpSp->FileObject->FsContext,
Irp, (PDRIVER_CANCEL)DispCancelRequest);
if (NT_SUCCESS(Status))
{
PVOID DataBuffer;
UINT BufferSize;
//查询缓冲的地址、长度
NdisQueryBuffer( (PNDIS_BUFFER)Irp->MdlAddress,&DataBuffer,&BufferSize );
if( (*((PADDRESS_FILE)Request.Handle.AddressHandle)->Send != NULL) )
{
ULONG DataUsed = 0;
//Send实际上是UDPSendDatagram函数。这个函数会自动根据目标地址选择合适的本地网卡,将udp报文发给合适的邻接点去
Status = (*((PADDRESS_FILE)Request.Handle.AddressHandle)->Send)(
Request.Handle.AddressHandle,//源地址
DgramInfo->SendDatagramInformation,//目标地址
DataBuffer,BufferSize,&DataUsed);
Irp->IoStatus.Information = DataUsed;
}
else Status = STATUS_UNSUCCESSFUL;
}
done:
if (Status != STATUS_PENDING)
DispDataRequestComplete(Irp, Status, Irp->IoStatus.Information);
else
IoMarkIrpPending(Irp);
return Status;
}
下面是UDP协议处理udp报文发送请求irp的函数
NTSTATUS UDPSendDatagram(
PADDRESS_FILE AddrFile,//源地址
PTDI_CONNECTION_INFORMATION ConnInfo,//目标地址
PCHAR BufferData,
ULONG DataSize,
PULONG DataUsed )
{
IP_PACKET Packet;
PTA_IP_ADDRESS RemoteAddressTa = (PTA_IP_ADDRESS)ConnInfo->RemoteAddress;
IP_ADDRESS RemoteAddress;
IP_ADDRESS LocalAddress;
USHORT RemotePort;
NTSTATUS Status;
PNEIGHBOR_CACHE_ENTRY NCE;
KIRQL OldIrql;
LockObject(AddrFile, &OldIrql);
switch( RemoteAddressTa->Address[0].AddressType ) {
case TDI_ADDRESS_TYPE_IP:
RemoteAddress.Type = IP_ADDRESS_V4;
RemoteAddress.Address.IPv4Address = RemoteAddressTa->Address[0].Address[0].in_addr;
RemotePort = RemoteAddressTa->Address[0].Address[0].sin_port;
break;
default:
UnlockObject(AddrFile, OldIrql);
return STATUS_UNSUCCESSFUL;
}
LocalAddress = AddrFile->Address;
if (AddrIsUnspecified(&LocalAddress))//如果没有指定源地址(即全0的通配地址,很常见)
{
//就根据目标地址和路由表自动算出要将该包发往哪一个邻接点(决定了邻接点,就决定了经由网卡)
NCE = RouteGetRouteToDestination( &RemoteAddress );
LocalAddress = NCE->Interface->Unicast;//该网卡的IP地址(单播地址)
}
Else 。。。//如果用户指定了源地址(也即显式指定了从某块网卡发出去,这种情况很少见)
//构造一个UDP报文(也即加上一个UDP头部)
Status = BuildUDPPacket( AddrFile,&Packet,&RemoteAddress,RemotePort,
&LocalAddress,AddrFile->Port,BufferData,DataSize );
UnlockObject(AddrFile, OldIrql);
//调用IPSendDatagram将该UDP报文发给指定邻接点(经由指定的本地网卡),这个函数前面看过,它将报文挂入指定邻接点的发包队列中,时机成熟后再由网卡的小端口驱动提供的发送函数将报文写入网卡内部的硬件发送缓冲区中完成发送。
if (!NT_SUCCESS(Status = IPSendDatagram( &Packet, NCE, UDPSendPacketComplete, NULL )))
{
FreeNdisPacket(Packet.NdisPacket);
return Status;
}
return STATUS_SUCCESS;
}
邻接点是什么意思呢?我们知道,目标机器可能很远很远,也可能与主机位于同一局域网中。主机发包时不可能直接发送给远程机器,中间经过的第一个路由器就是邻接点。而主机可以安装多块网卡,分属多个不同的局域网,通过多个路由器(网关)连向Internet。因此,发包时,必须算出我们的直接邻接点,首先将包(实际上是帧)发给它。当目标机器就是同局域网内的另外一台机器时,主机就可以直接发给他,不经由路由器,那么,目标机器就是邻接点。当目标机器很远很远不在同一局域网时,就必须选择一个路由器将报文转发出去,这个路由器此时就是我们的邻接点。总之,凡是主机直接相连(指可直接到达)的那些计算机、路由器都叫邻接点。
typedef struct NEIGHBOR_CACHE_ENTRY { //邻接点描述符
struct NEIGHBOR_CACHE_ENTRY *Next; //下一个
UCHAR State; //邻接点状态
UINT EventTimer; /* Ticks since last event */
UINT EventCount; /* Number of events */
PIP_INTERFACE Interface; //关键。主机通往该邻接点的经由网卡
UINT LinkAddressLength; //一般为6B
PVOID LinkAddress; //该邻接点的MAC地址
IP_ADDRESS Address; //该邻接点的IP地址
LIST_ENTRY PacketQueue; //该邻接点的发包队列(相当于网卡的发包队列)
} NEIGHBOR_CACHE_ENTRY, *PNEIGHBOR_CACHE_ENTRY;
上面的结构就是ARP协议的基础,缓冲记录了局域网内每个邻接点的IP地址与MAC地址映射情况。
理解了udp报文的发送过程,再看一下udp报文的接收过程。RecvFrom这个API最终进入下面的函数
int
WSPRecvFrom(SOCKET Handle,
LPWSABUF lpBuffers,
DWORD dwBufferCount,
LPDWORD lpNumberOfBytesRead,
LPDWORD ReceiveFlags,
struct sockaddr *SocketAddress,
int *SocketAddressLength,
LPWSAOVERLAPPED lpOverlapped,
LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
LPWSATHREADID lpThreadId,
LPINT lpErrno )
{
HANDLE Event = NULL;
Socket = GetSocketStructure(Handle);
Status = NtCreateEvent( &SockEvent, GENERIC_READ | GENERIC_WRITE,NULL, 1, FALSE );
RecvInfo.BufferArray = (PAFD_WSABUF)lpBuffers;
RecvInfo.BufferCount = dwBufferCount;
RecvInfo.TdiFlags = 0;
RecvInfo.AfdFlags = Socket->SharedData.NonBlocking ? AFD_IMMEDIATE : 0;
RecvInfo.AddressLength = SocketAddressLength;
RecvInfo.Address = SocketAddress;
if (*ReceiveFlags == 0)
RecvInfo.TdiFlags |= TDI_RECEIVE_NORMAL;
else
{
if (*ReceiveFlags & MSG_OOB)
RecvInfo.TdiFlags |= TDI_RECEIVE_EXPEDITED;
if (*ReceiveFlags & MSG_PEEK)
RecvInfo.TdiFlags |= TDI_RECEIVE_PEEK;
if (*ReceiveFlags & MSG_PARTIAL)//是否允许截断接收,用于UDP报文的接收标志
RecvInfo.TdiFlags |= TDI_RECEIVE_PARTIAL;
}
if (lpOverlapped == NULL)
{
APCContext = NULL;
APCFunction = NULL;
Event = SockEvent;
IOSB = &DummyIOSB;
}
else
{
if (lpCompletionRoutine == NULL)
{
APCContext = lpOverlapped;
APCFunction = NULL;
Event = lpOverlapped->hEvent;
}
else
{
APCFunction = NULL;
APCContext = lpCompletionRoutine;
RecvInfo.AfdFlags |= AFD_SKIP_FIO;
}
IOSB = (PIO_STATUS_BLOCK)&lpOverlapped->Internal;
RecvInfo.AfdFlags |= AFD_OVERLAPPED;
}
IOSB->Status = STATUS_PENDING;
//向套接字设备发送‘接收请求’这种socket irp
Status = NtDeviceIoControlFile((HANDLE)Handle,Event,APCFunction,APCContext,IOSB,
IOCTL_AFD_RECV_DATAGRAM,//接收UDP数据报
&RecvInfo,sizeof(RecvInfo),NULL,0);
if (Status == STATUS_PENDING && lpOverlapped == NULL)
{
WaitForSingleObject(SockEvent, INFINITE);
Status = IOSB->Status;
}
NtClose( SockEvent );
*ReceiveFlags = 0;
switch (Status)
{
case STATUS_RECEIVE_EXPEDITED: *ReceiveFlags = MSG_OOB;
break;
case STATUS_RECEIVE_PARTIAL_EXPEDITED:
*ReceiveFlags = MSG_PARTIAL | MSG_OOB;
break;
case STATUS_RECEIVE_PARTIAL:
*ReceiveFlags = MSG_PARTIAL;
break;
}
/* Re-enable Async Event */
SockReenableAsyncSelectEvent(Socket, FD_READ);
return MsafdReturnWithErrno ( Status, lpErrno, IOSB->Information, lpNumberOfBytesRead );
}
看看afd驱动是如何处理‘接收请求’irp的
NTSTATUS
AfdPacketSocketReadData(PDEVICE_OBJECT DeviceObject, PIRP Irp,PIO_STACK_LOCATION IrpSp )
{
NTSTATUS Status = STATUS_SUCCESS;
PFILE_OBJECT FileObject = IrpSp->FileObject;
PAFD_FCB FCB = FileObject->FsContext;
PAFD_RECV_INFO_UDP RecvReq;
PLIST_ENTRY ListEntry;
PAFD_STORED_DATAGRAM DatagramRecv;
if( !SocketAcquireStateLock( FCB ) ) return LostSocket( Irp );
if( FCB->State != SOCKET_STATE_BOUND )
return UnlockAndMaybeComplete( FCB, STATUS_INVALID_PARAMETER, Irp, 0 );
if( !(RecvReq = LockRequest( Irp, IrpSp )) )
return UnlockAndMaybeComplete( FCB, STATUS_NO_MEMORY, Irp, 0 );
RecvReq->BufferArray = LockBuffers( RecvReq->BufferArray,RecvReq->BufferCount,
RecvReq->Address,RecvReq->AddressLength,TRUE, TRUE );
//如果这个套接字的接包队列中有现成的包(最好不过)
if( !IsListEmpty( &FCB->DatagramList ) )
{
ListEntry = RemoveHeadList( &FCB->DatagramList );//将包摘下来
DatagramRecv = CONTAINING_RECORD( ListEntry, AFD_STORED_DATAGRAM, ListEntry );
if( DatagramRecv->Len > RecvReq->BufferArray[0].len &&
!(RecvReq->TdiFlags & TDI_RECEIVE_PARTIAL) ) //if 用户不准截断接收
{
InsertHeadList( &FCB->DatagramList,&DatagramRecv->ListEntry );//挂回去
Status = Irp->IoStatus.Status = STATUS_BUFFER_TOO_SMALL;
Irp->IoStatus.Information = DatagramRecv->Len;
if( !IsListEmpty( &FCB->DatagramList ) )
{
FCB->PollState |= AFD_EVENT_RECEIVE;//标记可接收
FCB->PollStatus[FD_READ_BIT] = STATUS_SUCCESS;
PollReeval( FCB->DeviceExt, FCB->FileObject );
}
else FCB->PollState &= ~AFD_EVENT_RECEIVE;
UnlockBuffers( RecvReq->BufferArray, RecvReq->BufferCount, TRUE );
return UnlockAndMaybeComplete( FCB, Status, Irp, Irp->IoStatus.Information );
}
else //如果用户提供的接收缓冲区够大 或者 用户准许截断接收
{
Status = SatisfyPacketRecvRequest( FCB, Irp, DatagramRecv,
(PUINT)&Irp->IoStatus.Information );
if( !IsListEmpty( &FCB->DatagramList ) )
{
FCB->PollState |= AFD_EVENT_RECEIVE;
FCB->PollStatus[FD_READ_BIT] = STATUS_SUCCESS;
PollReeval( FCB->DeviceExt, FCB->FileObject );
}
else FCB->PollState &= ~AFD_EVENT_RECEIVE;
UnlockBuffers( RecvReq->BufferArray, RecvReq->BufferCount, TRUE );
return UnlockAndMaybeComplete( FCB, Status, Irp, Irp->IoStatus.Information );
}
}
//如果当时没有包
else if( RecvReq->AfdFlags & AFD_IMMEDIATE ) //if可以立即失败完成返回
{
Status = STATUS_CANT_WAIT;
FCB->PollState &= ~AFD_EVENT_RECEIVE;
UnlockBuffers( RecvReq->BufferArray, RecvReq->BufferCount, TRUE );
return UnlockAndMaybeComplete( FCB, Status, Irp, 0 );
}
else
{
FCB->PollState &= ~AFD_EVENT_RECEIVE;
return LeaveIrpUntilLater( FCB, Irp, FUNCTION_RECV );//挂入该套接字的irp队列
}
}
//如果当时不能满足irp,就挂入队列
NTSTATUS LeaveIrpUntilLater( PAFD_FCB FCB, PIRP Irp, UINT Function )
{
InsertTailList( &FCB->PendingIrpList[Function],&Irp->Tail.Overlay.ListEntry );//挂入队列
IoMarkIrpPending(Irp);
(void)IoSetCancelRoutine(Irp, AfdCancelHandler);
SocketStateUnlock( FCB );
return STATUS_PENDING;
}
如上,当应用程序调用RecvFrom向套接字发出收包请求时,afd驱动会检查那个套接字的收包队列中是否有包,若有,就满足请求立即返回,否则,将irp挂入队列等待以后完成该irp请求。那么,什么时候该irp会满足完成呢?要想得到这个问题的答案,回顾一下,当网卡收到包时,会触发中断进入小端口驱动的isr,最后小端口驱动会将包上交给各个绑定协议。当上交给tcpip时,tcpip会根据报文头部的协议、目标ip地址、目标端口号找到所有符合条件的传输层地址对象,然后调用DGDeliverData函数将报文投递给那些符合条件的地址对象。
VOID DGDeliverData( //将收到的报文投递给目标地址文件
PADDRESS_FILE AddrFile,//符合条件的目标地址文件对象
PIP_ADDRESS SrcAddress,//报文头部中的源地址
PIP_ADDRESS DstAddress, //报文头部中的目标地址
USHORT SrcPort, //报文头部中的源端口
USHORT DstPort, //报文头部中的目标端口
PIP_PACKET IPPacket,//下层小端口提交上来的报文
UINT DataSize)//报文的长度
{
LockObject(AddrFile, &OldIrql);
if (AddrFile->Protocol == IPPROTO_UDP)
DataBuffer = IPPacket->Data;
else
{
if (AddrFile->HeaderIncl)
DataBuffer = IPPacket->Header;
else
{
DataBuffer = IPPacket->Data;
DataSize -= IPPacket->HeaderSize;
}
}
//如果该地址文件的接收请求队列不空,将收到的包满足 给 接收请求
if (!IsListEmpty(&AddrFile->ReceiveQueue))
{
PLIST_ENTRY CurrentEntry;
PDATAGRAM_RECEIVE_REQUEST Current = NULL;
PTA_IP_ADDRESS RTAIPAddress;
CurrentEntry = AddrFile->ReceiveQueue.Flink;
while(CurrentEntry != &AddrFile->ReceiveQueue)
{
Current = CONTAINING_RECORD(CurrentEntry, DATAGRAM_RECEIVE_REQUEST, ListEntry);
CurrentEntry = CurrentEntry->Flink;
if( DstPort == AddrFile->Port && (AddrIsEqual(DstAddress, &AddrFile->Address) ||
AddrIsUnspecified(&AddrFile->Address) || AddrIsUnspecified(DstAddress)))
{
RemoveEntryList(&Current->ListEntry);//摘下一个请求给予满足
RtlCopyMemory( Current->Buffer,DataBuffer,MIN(Current->BufferSize, DataSize) );
RTAIPAddress = (PTA_IP_ADDRESS)Current->ReturnInfo->RemoteAddress;
RTAIPAddress->TAAddressCount = 1;
RTAIPAddress->Address->AddressType = TDI_ADDRESS_TYPE_IP;
RTAIPAddress->Address->Address->sin_port = SrcPort;
RtlCopyMemory( &RTAIPAddress->Address->Address->in_addr,
&SrcAddress->Address.IPv4Address,sizeof(SrcAddress->Address.IPv4Address) );
if (Current->BufferSize < DataSize)
Current->Complete(Current->Context,STATUS_BUFFER_OVERFLOW,Current->BufferSize);
Else //关键。调用那个接收请求的完成函数,以继续发出一个接收请求。这样,没满足一个请求后,就立即再发出一个接收请求,从而使得传输层能够源源不断收到接收请求。
Current->Complete(Current->Context, STATUS_SUCCESS, DataSize);
Break;//仅仅满足一个接收请求
}
}
}
else … //如果收到包的时候,那个地址对象没有任何接收请求,就丢弃包,这就是为什么UDP协议是不可靠的,即使网络线路100%不出故障,也会因为目标机器接收速度 搞不过 发送速度 而 丢包。
}
看到没,传输层一收到udp报文,就会把报文满足给接收请求。每个地址对象内部维护着一个接收请求队列,当udp套接字一绑定地址对象时,就会立马向传输层发出一个接收请求,挂入相应地址对象的接收请求队列中,这就是第一个udp接收请求的产生时机(第一个接收请求是在AfdBindSocket函数内部调用TdiReceiveDatagram发出的)
回顾一下绑定过程:
AfdBindSocket(PDEVICE_OBJECT DeviceObject, PIRP Irp,PIO_STACK_LOCATION IrpSp)
{ 。。。
if( FCB->Flags & AFD_ENDPOINT_CONNECTIONLESS )
{
Status = TdiReceiveDatagram
( &FCB->ReceiveIrp.InFlightRequest,//InFlightRequest表示当前生成的irp,用于复用
FCB->AddressFile.Object,//要发往的目标传输层地址对象
0,//flags
FCB->Recv.Window,//udp接收缓冲区,这是一个中间缓冲,用于复用
FCB->Recv.Size,//默认总为16384B
FCB->AddressFrom, &FCB->ReceiveIrp.Iosb,
PacketSocketRecvComplete,FCB );//关键。完成例程
if( Status == STATUS_PENDING ) Status = STATUS_SUCCESS;
}
。。。
}
如上,刚一完成绑定,就立即向传输层对应的地址对象投递一个接收请求
NTSTATUS TdiReceiveDatagram( //构造一个irp,发往传输层
PIRP *Irp,//生成一个要发往传输层的tdi irp
PFILE_OBJECT TransportObject,//传输层的地址文件对象
USHORT Flags,
PCHAR Buffer,//接收缓冲
UINT BufferLength,
PTDI_CONNECTION_INFORMATION Addr,//from地址
PIO_STATUS_BLOCK Iosb,
PIO_COMPLETION_ROUTINE CompletionRoutine,
PVOID CompletionContext)
{
PDEVICE_OBJECT DeviceObject;
NTSTATUS Status;
PMDL Mdl;
DeviceObject = IoGetRelatedDeviceObject(TransportObject);//\Device\Udp
*Irp = TdiBuildInternalDeviceControlIrp
( TDI_RECEIVE_DATAGRAM,DeviceObject,TransportObject,NULL,Iosb );
Mdl = IoAllocateMdl(Buffer,BufferLength,FALSE,FALSE,NULL);
_SEH2_TRY {
MmProbeAndLockPages(Mdl, (*Irp)->RequestorMode, IoModifyAccess);
} _SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER) {
IoFreeMdl(Mdl);
IoCompleteRequest(*Irp, IO_NO_INCREMENT);
*Irp = NULL;
_SEH2_YIELD(return STATUS_INSUFFICIENT_RESOURCES);
} _SEH2_END;
TdiBuildReceiveDatagram (*Irp,DeviceObjectTransportObject,
CompletionRoutine,CompletionContext,
Mdl,BufferLength,Addr,Addr,Flags);
Status = TdiCall(*Irp, DeviceObject, NULL, Iosb);//将irp发往传输层
return Status;
}
看看传输层是如何处理afd层发下来的udp报文接收请求的
NTSTATUS DispTdiReceiveDatagram(PIRP Irp) //处理udp接收请求的函数
{
PIO_STACK_LOCATION IrpSp;
PTDI_REQUEST_KERNEL_RECEIVEDG DgramInfo;
PTRANSPORT_CONTEXT TranContext;
TDI_REQUEST Request;
NTSTATUS Status;
ULONG BytesReceived = 0;
IrpSp = IoGetCurrentIrpStackLocation(Irp);
DgramInfo = (PTDI_REQUEST_KERNEL_RECEIVEDG)&(IrpSp->Parameters);
TranContext = IrpSp->FileObject->FsContext;
/* Initialize a receive request */
Request.Handle.AddressHandle = TranContext->Handle.AddressHandle;
Request.RequestNotifyObject = DispDataRequestComplete;
Request.RequestContext = Irp;
Status = DispPrepareIrpForCancel(IrpSp->FileObject->FsContext,Irp,
(PDRIVER_CANCEL)DispCancelRequest);
if (NT_SUCCESS(Status))
{
PVOID DataBuffer;
UINT BufferSize;
NdisQueryBuffer( (PNDIS_BUFFER)Irp->MdlAddress,&DataBuffer,&BufferSize );
Status = DGReceiveDatagram(
Request.Handle.AddressHandle,//生成一个接收请求(非irp)挂入这个地址对象的请求队列中
DgramInfo->ReceiveDatagramInformation,
DataBuffer,//实际上是window
DgramInfo->ReceiveLength,//16384B
DgramInfo->ReceiveFlags,//0
DgramInfo->ReturnDatagramInformation,
&BytesReceived,
(PDATAGRAM_COMPLETION_ROUTINE)DispDataRequestComplete,
Irp,Irp);
}
done:
if (Status != STATUS_PENDING)
DispDataRequestComplete(Irp, Status, BytesReceived);
else
IoMarkIrpPending(Irp);
return Status;
}
实际的处理工作在DGReceiveDatagram函数中,我们看
NTSTATUS DGReceiveDatagram(
PADDRESS_FILE AddrFile,//将生产的接收请求挂入这个地址对象的接收请求队列中
PTDI_CONNECTION_INFORMATION ConnInfo,
PCHAR BufferData,
ULONG ReceiveLength,
ULONG ReceiveFlags,
PTDI_CONNECTION_INFORMATION ReturnInfo,
PULONG BytesReceived,
PDATAGRAM_COMPLETION_ROUTINE Complete,
PVOID Context,
PIRP Irp)
{
NTSTATUS Status;
PDATAGRAM_RECEIVE_REQUEST ReceiveRequest;
KIRQL OldIrql;
LockObject(AddrFile, &OldIrql);
//关键。分配构造一个接收请求(非irp)
ReceiveRequest = ExAllocatePoolWithTag(NonPagedPool, sizeof(DATAGRAM_RECEIVE_REQUEST));
if (ReceiveRequest)
{
if ((ConnInfo->RemoteAddressLength != 0) && (ConnInfo->RemoteAddress))
{
Status = AddrGetAddress(ConnInfo->RemoteAddress,
&ReceiveRequest->RemoteAddress, &ReceiveRequest->RemotePort);
}
else
{
ReceiveRequest->RemotePort = 0;
AddrInitIPv4(&ReceiveRequest->RemoteAddress, 0);
}
IoMarkIrpPending(Irp);
ReceiveRequest->ReturnInfo = ReturnInfo;
ReceiveRequest->Buffer = BufferData;
ReceiveRequest->BufferSize = ReceiveLength;
ReceiveRequest->UserComplete = Complete;//指DispDataRequestComplete
ReceiveRequest->UserContext = Context;//指 irp
ReceiveRequest->Complete = (PDATAGRAM_COMPLETION_ROUTINE)DGReceiveComplete;//完成函数
ReceiveRequest->Context = ReceiveRequest;
ReceiveRequest->AddressFile = AddrFile;
ReceiveRequest->Irp = Irp;
//关键,挂入目标地址对象的接收请求队列
InsertTailList(&AddrFile->ReceiveQueue, &ReceiveRequest->ListEntry);
return STATUS_PENDING;
}
return Status;
}
前面我们看到,每当满足一个接收请求后,会调用它的完成函数,以继续发出接收请求。我们看看那个完成函数是不是这样做的。
VOID DGReceiveComplete(PVOID Context, NTSTATUS Status, ULONG Count)
{
PDATAGRAM_RECEIVE_REQUEST ReceiveRequest = Context;
//调用用户设置的完成函数,即DispDataRequestComplete函数
ReceiveRequest->UserComplete( ReceiveRequest->UserContext, Status, Count );
ExFreePoolWithTag( ReceiveRequest, DATAGRAM_RECV_TAG );
}
VOID DispDataRequestComplete(
PVOID Context,
NTSTATUS Status,
ULONG Count)
{
PIRP Irp = Context;
Irp->IoStatus.Status = Status;
Irp->IoStatus.Information = Count;
IRPFinish(Irp, Status);
}
NTSTATUS IRPFinish( PIRP Irp, NTSTATUS Status )
{
KIRQL OldIrql;
Irp->IoStatus.Status = Status;
if( Status == STATUS_PENDING )
IoMarkIrpPending( Irp );
else {
IoAcquireCancelSpinLock(&OldIrql);
(void)IoSetCancelRoutine( Irp, NULL );
IoReleaseCancelSpinLock(OldIrql);
IoCompleteRequest( Irp, IO_NETWORK_INCREMENT );//关键。完成该irp
}
return Status;
}
在IoCompleteRequest中会调用最初设置的完成例程,即PacketSocketRecvComplete函数
NTSTATUS //每当完成了一个udp接收请求后调用执行这个函数
PacketSocketRecvComplete(PDEVICE_OBJECT DeviceObject,PIRP Irp,PVOID Context )
{
NTSTATUS Status = STATUS_SUCCESS;
PAFD_FCB FCB = Context;
PIRP NextIrp;
PIO_STACK_LOCATION NextIrpSp;
PLIST_ENTRY ListEntry;
PAFD_RECV_INFO RecvReq;
PAFD_STORED_DATAGRAM DatagramRecv;
UINT DGSize = Irp->IoStatus.Information + sizeof( AFD_STORED_DATAGRAM );
PLIST_ENTRY NextIrpEntry, DatagramRecvEntry;
if( !SocketAcquireStateLock( FCB ) ) return STATUS_FILE_CLOSED;
FCB->ReceiveIrp.InFlightRequest = NULL; //当前irp置空
if( FCB->State == SOCKET_STATE_CLOSED ) 。。。
//关键。分配一个AFD_STORED_DATAGRAM结构,即套接字接包队列中的结构
DatagramRecv = ExAllocatePool( NonPagedPool, DGSize );
DatagramRecv->Len = Irp->IoStatus.Information;
//所有接下来的udp报文都临时寄存在Window中,可以看出这个Window这仅仅用作中转
RtlCopyMemory( DatagramRecv->Buffer, FCB->Recv.Window,DatagramRecv->Len );
DatagramRecv->Address = TaCopyTransportAddress( FCB->AddressFrom->RemoteAddress );
//关键。将包挂入套接字的接包队列中,这样,当上层应用程序发出接收请求时就直接从这个队列取出包来满足它。
InsertTailList( &FCB->DatagramList, &DatagramRecv->ListEntry );
//下面的循环,顺带检查一下当前是否有irp在等候,若有,就立即满足它
while( !IsListEmpty( &FCB->DatagramList ) &&
!IsListEmpty( &FCB->PendingIrpList[FUNCTION_RECV] ) )
{
ListEntry = RemoveHeadList( &FCB->DatagramList );
DatagramRecv = CONTAINING_RECORD( ListEntry, AFD_STORED_DATAGRAM,ListEntry );
ListEntry = RemoveHeadList( &FCB->PendingIrpList[FUNCTION_RECV] );
NextIrp = CONTAINING_RECORD( ListEntry, IRP, Tail.Overlay.ListEntry );
NextIrpSp = IoGetCurrentIrpStackLocation( NextIrp );
RecvReq = NextIrpSp->Parameters.DeviceIoControl.Type3InputBuffer;
if( DatagramRecv->Len > RecvReq->BufferArray[0].len &&
!(RecvReq->TdiFlags & TDI_RECEIVE_PARTIAL) )
{
InsertHeadList( &FCB->DatagramList,&DatagramRecv->ListEntry );//挂回去
Status = NextIrp->IoStatus.Status = STATUS_BUFFER_TOO_SMALL;
NextIrp->IoStatus.Information = DatagramRecv->Len;
UnlockBuffers( RecvReq->BufferArray, RecvReq->BufferCount, TRUE );
if ( NextIrp->MdlAddress ) UnlockRequest( NextIrp,IoGetCurrentIrpStackLocation( NextIrp ) );
IoSetCancelRoutine(NextIrp, NULL);
IoCompleteRequest( NextIrp, IO_NETWORK_INCREMENT );
}
else
{
Status = SatisfyPacketRecvRequest ( FCB, NextIrp, DatagramRecv,
(PUINT)&NextIrp->IoStatus.Information );
UnlockBuffers( RecvReq->BufferArray, RecvReq->BufferCount, TRUE );
if ( NextIrp->MdlAddress )
UnlockRequest( NextIrp, IoGetCurrentIrpStackLocation( NextIrp ) );
IoCompleteRequest( NextIrp, IO_NETWORK_INCREMENT );
}
}
if( !IsListEmpty( &FCB->DatagramList ) )
{
FCB->PollState |= AFD_EVENT_RECEIVE;
FCB->PollStatus[FD_READ_BIT] = STATUS_SUCCESS;
PollReeval( FCB->DeviceExt, FCB->FileObject );
}
Else FCB->PollState &= ~AFD_EVENT_RECEIVE;
//关键。果然,又立马向传输层发出一个接收请求。因为传输层收到包后,若发现当前没有接收请求,就会丢包。所以必须在afd驱动层接收到一个udp包后,赶紧立即再向传输层发请求过去。
if( NT_SUCCESS(Irp->IoStatus.Status) )
{
Status = TdiReceiveDatagram
( &FCB->ReceiveIrp.InFlightRequest,
FCB->AddressFile.Object,
0,
FCB->Recv.Window,
FCB->Recv.Size,
FCB->AddressFrom,
&FCB->ReceiveIrp.Iosb,
PacketSocketRecvComplete,
FCB );
}
SocketStateUnlock( FCB );
return STATUS_SUCCESS;
}
总结一下协议驱动、小端口驱动之间的交互步骤:
1、 系统启动时加载ndis.sys模块,建立起ndis基础运行环境
2、 安装加载各种协议驱动,在DriverEntry中注册协议特征,即各种回调函数
3、 安装网卡,加载小端口驱动,在DriverEntry中注册小端口特征,即各种回调函数
4、 进入小端口驱动的AddDevice,ndis自动为我们创建一个小端口设备对象,加入堆栈
5、 系统为这个网卡分配端口、中断号等资源
6、 启动网卡设备,进入小端口驱动注册的初始化例程
7、 在小端口驱动的初始化例程中:初始化硬件寄存器、注册中断向量、分配自定义设备扩展等典型工作
8、 网卡启动初始化完毕后,ndis框架调用各协议驱动提供的绑定回调函数,通知绑定
9、 进入各协议驱动提供的绑定回调函数,我们要调用NdisOpenAdapter打开网卡进行绑定
10、 Ndis框架调用绑定完成回调函数 或 我们自己手动模拟调用
11、 网卡收到一个数据包,触发中断,进入ndis托管的isr
12、 托管Isr进入我们注册的isr和后半部
13、 我们的isr调用NdisMEthIndicateReceive这个宏,调用各绑定协议提供的接收回调函数,向上提交
14、 进入各个协议的接收回调函数(ReceivePacketHandler和ReceiveHandler)
15、 后面怎么处理收到的包自行决定