Python抽取文件_相同时间段间隔_取特定图片

功能说明

前置条件

代码示例

# -*- coding: utf-8 -*-

import os 
import datetime 
import pandas as pd
import numpy as np
import math


def transform_time(file_dir):
    for n,file_nm in enumerate(os.listdir(file_dir)):
        if file_nm.endswith("pcd"):
            pcd_timestamp = os.path.splitext(file_nm)[0][-16:]
            #转换成localtime-微秒转换为标准时间戳
            timedata = int(pcd_timestamp)/1000000
            time_local = datetime.datetime.fromtimestamp(timedata)
            #转换成新的时间格式(2016-05-05 20:28:54)
            dt = time_local.strftime("%Y-%m-%d %H:%M:%S:%f")
            print(n,dt,pcd_timestamp)


if __name__ == "__main__":
    file_dir = r"/home/test/data/"
    data_orig = list()
    for n,file_nm in enumerate(sorted(os.listdir(file_dir))):
        if file_nm.endswith("jpg"):
            pcd_timestamp = os.path.splitext(file_nm)[0][-16:]
            pcd_seconds = os.path.splitext(file_nm)[0][-16:-6]
            data_info = (file_nm,int(pcd_timestamp),int(pcd_seconds))
            data_orig.append(data_info)
    orig_df = pd.DataFrame(data_orig, columns=['file', 'pcd_micro', 'pcd_second'])
    # 可以10秒一分组    
    orig_df['pcd_second'] = pd.to_datetime(orig_df['pcd_second'],unit='s')
    datt = orig_df.groupby(pd.Grouper(key='pcd_second', freq="10s"))
    top_group = list()
    for name,or_data in datt:
        #print(name,len(or_data),or_data)
        sig_list =list()
        pcd_group = pd.DataFrame(or_data)
        for name,sub_group_data in pcd_group.groupby('pcd_second'):
            k= math.floor(len(sub_group_data)/2)
            sub_group_data = sub_group_data.reset_index()
            if len(sub_group_data) >=2:
                sig_list.append(sub_group_data.iloc[[0]])
                sig_list.append(sub_group_data.iloc[[k]])
            else:
                sig_list.append(sub_group_data)
        for data in sig_list:
            print(data.values[0][1])

传统编程

##01.构建字典 -分组
    字典映射-一个key映射多个value,通过将value组合成list或者set的方式,进行分组
  想保持元素的插入顺序就应该使用列表,如果想去掉重复元素就使用集合
 collections模块中的defaultdict来构造这样的字典。defaultdict的一个特征是它会自动初始化每个key刚开始对应的值	  
##001.利用传统的dict 
 d = {}
 for key, value in pairs:
     if key not in d:
         d[key] = []
     d[key].append(value)
 	
###002.利用defaultdict的代码如下:
 from collections import defaultdict 
 d = defaultdict(list)
 for key, value in pairs:
     d[key].append(value)
	 
##02.对各个分组--进行操作

# 向上取整-抽取帧的下标  创建等差数列
choose_num_index = np.linspace(0, file_k-1, choose_total_num, dtype=int)
numpy.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0)
   start:返回样本数据开始点
   stop:返回样本数据结束点
   num:生成的样本数据量,默认为50
   endpoint:True则包含stop;False则不包含stop
   retstep:If True, return (samples, step), where step is the spacing between samples.(即如果为True则结果会给出数据间隔)
   dtype:输出数组类型
   axis:0(默认)或-1
# numpy.logspace函数生成等比数列

## range函数 range(start, stop [,step])   可以使用3个参数:起始值,终止值,间隔 
    左闭右开区间【)
    Python2 中 range() 生成的是列表,本文基于Python3,生成的是可迭代对象

类似的代码-进行实际应用

image_group = {}
for image_file in image_list:
    # 提取时间戳到秒- 1659173359.123156.jpg
    time_sec = get_sec_time(image_file)
    if time_sec not in image_group:
        image_group[time_sec] = []
    image_group[time_sec].append(image_file)

from collections import defaultdict 
d = defaultdict(list)
for img_file in img_list:
    time_sec =get_sec_time(image_file)
    d[time_sec].append(img_file) 

示例代码

# -*- coding: utf-8 -*-

import os
from glob import glob

def image_sample(image_path,interval_ps = 2):
    # image目录 global 是全局数据
	# glob 的文件名List只包括当前目录下的文件名,不包括子文件夹中的文件
	# os.listdir( path )
    image_list = glob(os.path.join(image_path, '*.jpg'))
    ##去除后缀.jpg的 1659173359.123156.jpg
    image_list.sort(key=lambda f: float(f.split('/')[-1][0:-4]))
    image_group = {}
    for image_file in image_list:
	    # 提取时间戳到秒- 1659173359.123156.jpg
        time_sec = image_file.split('/')[-1].split('.')[0]
        if time_sec not in image_group:
            image_group[time_sec] = []
        image_group[time_sec].append(image_file)
    choose_img = []
    for time_sec in image_group:
        image_group_list = image_group[time_sec]
        if len(image_group_list) <= interval_ps:
            choose_img.extend(image_group_list)
        else:
            offset = round(len(image_group_list) / interval_ps)
            count = 0
            for i in range(0, len(image_group_list), offset):
                choose_img.append(image_group_list[i])
                count = count + 1
                if count == interval_ps:
                    break
    return choose_img
	
if __name__ == "__main__":
	extra_interval=3
	choose_file="/dta"
	time_dat = image_sample(choose_file, extra_interval)
	print(time_dat)

示例代码

##构建不同的数据结构,注意解耦合和扩展性
frame_files=[]
seconds = []
for filenames in os.listdir(image_file_path):
    file_second =get_file_second(filenames)
    if file_second not in seconds:
        seconds.append(file_second)
    second_file = {
        'name': file,
        'second': file_second,
        'source': os.path.join(image_file_path,file),
        'target':os.path.join(image_file_path.replace('source', 'dest'),file)
    }
    frame_files.append(second_file)
interval_ps =2
choose_img = []
for second in seconds:
    second_files = [image for image in frame_files if image['second'] == second]
    image_sec_len = len(second_files)  
    if image_sec_len < interval_ps:
	    pass
	else:
	    offset = round(len(image_group_list) / interval_ps)

参考

详解pd.Grouper()以及时间分组groupby() https://blog.csdn.net/weixin_46713695/article/details/125416343
posted @ 2022-08-26 16:12  辰令  阅读(117)  评论(0编辑  收藏  举报