如何寻找bug(6)

继续在第117~170行之间添加DEBUG_TEXT. 出错范围缩小到133~165行。

133    DEBUG_TEXT(DFDB_ROSCORE, 8, "FragmentRequest::execute calling dc->getFragment for L1Id "<< m_level1Id<< std::endl);//已被打印
134    for (std::vector<DataChannel*>::iterator dc=chanStartIter; dc!=chanEndIter; dc++) {
135       DEBUG_TEXT(DFDB_ROSCORE, 20, "FragmentRequest::execute calling dc->getFragment for L1Id "<< m_level1Id<< std::endl);
136       EventFragment* subFragment = ((*dc)->getFragment(m_ticket[index])) ;
137       if (subFragment != 0) {
138          partsReceived++;
139          DEBUG_TEXT(DFDB_ROSCORE, 20, "FragmentRequest::execute calling builder->appendFragment for L1Id "<< m_level1Id<< std::endl    );
140          m_builder->appendFragment(m_eventFragment,subFragment);
141          TS_RECORD(TS_H1,2350);
142 
143          fragmentOk=subFragment->fragmentReady();
144 
145          s_mutex->lock() ;
146          delete subFragment;
147          s_mutex->unlock() ;
148       }
149       else {
150          DEBUG_TEXT(DFDB_ROSCORE, 8, "FragmentRequest for L1Id "<< m_level1Id << " missing data aborting\n");
151          fragmentOk=false;
152          TS_RECORD(TS_H1,2360);
153       }
154 
155       index++;
156    }
157 
158 
159    bool retired=false;
160    if (!fragmentOk) {
161      retired=checkAge(s_maxAge);
162    }
163 
164 
165    DEBUG_TEXT(DFDB_ROSCORE, 8, "FragmentRequest::execute L1Id:"<< m_level1Id<< ",fragmentOk="<< fragmentOk<<",retired="<<retired<<"    ,partsReceived="<<partsReceived<< std::endl);//未被打印

先在log文件找到requestOk:-1,确定是196635 出现问题。

[lhaaso@cmm03node01 part_dk_ef]$ grep "FragmentRequest::execute calling dc->getFragment for L1Id 196635" ROS-Eth-00_cmm03node01_1487253328.out
Debug(13,140405022635776): FragmentRequest::execute calling dc->getFragment for L1Id 196635
[lhaaso@cmm03node01 part_dk_ef]$ grep "Debug(13,140405022635776): FragmentRequest::execute L1Id:196635" ROS-Eth-00_cmm03node01_1487253328.out
[lhaaso@cmm03node01 part_dk_ef]$ 

继续加打印缩小范围,定位到是在第136行出现的问题:

136       EventFragment* subFragment = ((*dc)->getFragment(m_ticket[index])) ;

PCMemoryDataChannel.cpp 里的PCMemoryDataChannel::getFragment()第200行有打印出来,猜测错误应该是出现在try块里。继续缩小范围,定位到第203行出现问题。
198     if((intptr_t)ed == EventInputManager::EIM_MAYCOME || (intptr_t)ed == EventInputManager::EIM_NEVERTOCOME)
199     {
200     DEBUG_TEXT(DFDB_ROSFM, 8, "EIM_MAYCOME || EIM_NEVERTOCOME " << ticket << std::endl);
201       try
202       {
203   fragment = new ROBFragment(m_memoryPool, ticket, m_sourceIdentifier, 0); //create an empty ROB fragment //出现问题的代码
204   Buffer *mem_buffer = fragment->buffer();                                 //get the Buffer of the ROB fragment
205   Buffer::page_iterator mem_page_i = mem_buffer->begin();
206   MemoryPage *mem_page = const_cast<MemoryPage *>(*mem_page_i);            //get the memory page of the buffer
207   mem_page->lock();
208 
209   evDesc_t *ed = m_eventInputManager->getEventDescriptor(mem_page);        //get a pointer to the event descriptor
210   ed->L1id = ticket;                                                       //set the L1ID
211   m_eventInputManager->createEvent(ed);                                    //Insert the event into the Event Input Manager
212 
213   if ((intptr_t)ed == EventInputManager::EIM_MAYCOME)
214   {
215     m_statistics->fragmentsMissed++;
216     fragment->setStatus(EventFragment::STATUS_MAYCOME);
217     DEBUG_TEXT(DFDB_ROSFM, 10, "PCMemoryDataChannel::getFragment: Fragment for L1ID " << ticket << " has not yet arrived");
218   }
219 
220   if ((intptr_t)ed == EventInputManager::EIM_NEVERTOCOME)
221   {
222     m_statistics->fragmentsLost++;
223     fragment->setStatus(EventFragment::STATUS_LOST);
224     DEBUG_TEXT(DFDB_ROSFM, 10, "PCMemoryDataChannel::getFragment: Fragment for L1ID " << ticket << " does not exist");
225           CREATE_ROS_EXCEPTION(ex1, CoreException, PCMEMCHAN_LOST, "\n L1ID = " << ticket << ", ROL physical addr = " << physicalAd    dress());
226           ers::warning(ex1);
227   }

 

查看ROBFragment.cpp中ROBFragment的构造函数如下:

198 /********************************************************************************************/
199 ROBFragment::ROBFragment(MemoryPool* mempool, u_int level1Id, u_int sourceId, u_int runNumber)
200 /********************************************************************************************/
201 {
202   DEBUG_TEXT(DFDB_ROSEF, 8 , "Lost event " << level1Id << " begin to created"); //已被打印
203   // This constructor is for the (hopefully) rare case that a ROD fragment 
204   // does not get delivered by the ROL and the FragmentManager has to 
205   // return an empty ROB fragment
206 
207   m_buffer = new Buffer(mempool); //出现问题代码行
208 
209   // Build the ROB header
210   DEBUG_TEXT(DFDB_ROSEF, 8, "calling initialiseHeader "<<level1Id << std::endl); //未被打印
211   initialiseHeader(sourceId, STATUS_TIMEOUT);
212 
213   DEBUG_TEXT(DFDB_ROSEF, 8, "ROBFragment::ROBFragment(lost): s_formatVersionNumber for ROD header is " << s_formatVersionNumber <<     " " << level1Id << std::endl);
214   // Build the ROD header
215   m_rodheader = new(m_buffer) RODFragment::RODHeader;
216   DEBUG_TEXT(DFDB_ROSEF, 8, "m_rodheader is at " << m_rodheader << " " << level1Id << std::endl);
217   m_rodheader->startOfHeaderMarker = s_rodMarker;
218   m_rodheader->headerSize          = sizeof(RODFragment::RODHeader) / sizeof (u_int);
219   m_rodheader->formatVersionNumber = s_rodformatVersionNumber;
220   //The source ID of the ROD header should not be identical to that of the ROB header. As we don't know
221   //it (without additional tricks in the FM) I duplicate it anyway. FIXME
222   m_rodheader->sourceIdentifier    = sourceId & 0xffffff;
223   m_rodheader->level1Id            = level1Id;
224   m_rodheader->bunchCrossingId     = 0;
225   m_rodheader->level1TriggerType   = 0;
226   m_rodheader->detectorEventType   = 0;
227   m_rodheader->runNumber           = runNumber;
228 
229   // Build the ROD body (just one status word)
230   m_rodbody = new(m_buffer) u_int[1];
231   *m_rodbody = STATUS_TIMEOUT;    // Error status
232   DEBUG_TEXT(DFDB_ROSEF, 8, "m_rodbody is at " << m_rodbody << " " << level1Id << std::endl);
233 
234   // Build the ROD trailer
235   m_rodtrailer = new(m_buffer) RODFragment::RODTrailer;
236   DEBUG_TEXT(DFDB_ROSEF, 8, "m_rodtrailer is at " << m_rodtrailer << " " << level1Id << std::endl);
237   m_rodtrailer->numberOfStatusElements = 1;
238   m_rodtrailer->numberOfDataElements   = 0;
239   m_rodtrailer->statusBlockPosition    = 0;
240 
241   // Generic ROB header   
242   int rodsize = RODFragment::s_rodheaderSize + 1 + RODFragment::s_rodtrailerSize;
243   m_header->generic.totalFragmentsize  = s_robheaderSize + rodsize;
244 
245   //No ROB trailer. crc_flag is 0
246 
247   m_rodFragmentExists = 1;
248   DEBUG_TEXT(DFDB_ROSEF, 8 , "Lost event " << level1Id << " created");
249 }

 

查看../../ROSBufferManagement/src/Buffer.cpp 

121 Buffer::Buffer(MemoryPool *memoryPool)
122   : m_memoryPool(memoryPool),
123     m_size(0),
124     m_lastMemoryPage(m_memoryPool->getPage()),
125     m_pageSize(m_memoryPool->pageSize()),
126     m_numberOfPages(1),
127     m_current(0),
128     m_reserved(0)
129 {
130   m_pages[0]=m_lastMemoryPage;
131 }

 

../../ROSMemoryPool/ROSMemoryPool/MemoryPool.h

121   inline MemoryPage * MemoryPool::getPage()
122   {
123     if (m_freeIndex >= m_noPages)
124       throw MemoryPoolException(MemoryPoolException::NOPAGESAVAILABLE);
125 
126     MemoryPage *rc = (*m_pageVector)[m_freeIndex];
127     (*m_pageVector)[m_freeIndex] = 0;
128     m_freeIndex++;
129     return rc;
130   }

 

出现问题的原因:

FragmentRequest::execute()执行时, getFragment时没找到对应L1id的ROBFragment, 于是要做一个空的ROBFragment,在getPage时报错。

(批注:为什么会出现找不到ROB的情况呢?因为ROS是在数据到齐的情况下才会向L2SV发送消息,然后消息传递到SFI, SFI才会向ROS请求对应L1id的数据,这个时候缺少ROB从逻辑上说是不对的。所以怀疑检查数据完整性的逻辑是不是有问题。)

posted @ 2017-02-16 22:48  小荷才楼尖尖角  Views(267)  Comments(0Edit  收藏  举报