import sys
import os
import time
import json
from datetime import datetime
from threading import Thread, Timer, Lock
import os
import pyautogui
from pynput import mouse, keyboard
from loguru import logger
from PyQt5.QtWidgets import QApplication, QWidget
from PyQt5.QtCore import Qt, QRect
from PyQt5.QtGui import QPainter, QPen, QColor, QGuiApplication, QRegion
import os
import signal
EVENT_LOG_FILE = "event_log.json"
os.makedirs("screenshots", exist_ok=True)
event_data = []
ctrl_pressed = False
shift_pressed = False
alt_pressed = False
# 输入缓冲区
input_buffer = []
last_char_time = None
buffer_timeout = 0.5
# 鼠标状态
mouse_pressed = False
drag_path = []
drag_start = None
drag_button = None
press_time = None
double_click_threshold = 0.3
click_distance_threshold = 5
last_click_time = None
last_click_pos = None
last_click_button = None
pending_click_event = None
pending_click_timer = None
lock = Lock()
app = None
red_border_window = None
mouse_listener = None
keyboard_listener = None
logger.add("debug.log", format="{time} {level} {message}", level="DEBUG", rotation="1 MB", compression="zip")
def save_event_data():
with open(EVENT_LOG_FILE, 'w', encoding='utf-8') as f:
json.dump(event_data, f, ensure_ascii=False, indent=4)
logger.debug("Event data saved to file.")
def take_screenshot():
logger.debug("Taking screenshot...")
if red_border_window:
red_border_window.hide()
time.sleep(0.05)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
filename = f"screenshots/{timestamp}.png"
screenshot = pyautogui.screenshot()
screenshot.save(filename)
if red_border_window:
red_border_window.show()
logger.info(f"Screenshot saved as {filename}")
return filename
def flush_input_buffer():
global input_buffer
if input_buffer:
logger.debug(f"Flushing input buffer: {''.join(input_buffer)}")
screenshot_file = take_screenshot()
event_info = {
"type": "text_input",
"time": datetime.now().isoformat(),
"text": "".join(input_buffer),
"screenshot": screenshot_file
}
event_data.append(event_info)
input_buffer = []
save_event_data()
def handle_modifiers(key, pressed):
global ctrl_pressed, shift_pressed, alt_pressed
if key in [keyboard.Key.ctrl_l, keyboard.Key.ctrl_r]:
ctrl_pressed = pressed
elif key in [keyboard.Key.shift, keyboard.Key.shift_r]:
shift_pressed = pressed
elif key in [keyboard.Key.alt_l, keyboard.Key.alt_r]:
alt_pressed = pressed
logger.debug(f"Modifier changed: ctrl={ctrl_pressed}, shift={shift_pressed}, alt={alt_pressed}")
def handle_combination(key_char):
if ctrl_pressed and key_char.lower() in ["c","v","x","s","a"]:
# 这里的截图逻辑依旧在事件行为结束时进行,组合键按下后立即结束事件行为
screenshot_file = take_screenshot()
event_data.append({
"type": "key_shortcut",
"time": datetime.now().isoformat(),
"shortcut": f"ctrl+{key_char.lower()}",
"screenshot": screenshot_file
})
save_event_data()
logger.info(f"Detected shortcut: ctrl+{key_char.lower()}")
return True
return False
def handle_normal_char(key_char):
global input_buffer, last_char_time
if last_char_time is not None and (time.time() - last_char_time) > buffer_timeout:
flush_input_buffer()
input_buffer.append(key_char)
last_char_time = time.time()
logger.debug(f"Buffered char: {key_char}")
def handle_special_key(key):
# 特殊按键事件立即结束行为
screenshot_file = take_screenshot()
special_key_name = None
if key == keyboard.Key.enter:
special_key_name = "enter"
elif key == keyboard.Key.esc:
special_key_name = "esc"
elif key == keyboard.Key.tab:
special_key_name = "tab"
elif key == keyboard.Key.up:
special_key_name = "up"
elif key == keyboard.Key.down:
special_key_name = "down"
elif key == keyboard.Key.left:
special_key_name = "left"
elif key == keyboard.Key.right:
special_key_name = "right"
elif key in [keyboard.Key.f1, keyboard.Key.f2, keyboard.Key.f3, keyboard.Key.f4,
keyboard.Key.f5, keyboard.Key.f6, keyboard.Key.f7, keyboard.Key.f8,
keyboard.Key.f9, keyboard.Key.f10, keyboard.Key.f11, keyboard.Key.f12]:
special_key_name = str(key)
if special_key_name:
event_data.append({
"type": "key_special",
"time": datetime.now().isoformat(),
"key": special_key_name,
"screenshot": screenshot_file
})
save_event_data()
logger.info(f"Recorded special key: {special_key_name}")
def check_exit_condition():
logger.info("Detected ctrl+alt+esc, exiting...")
if mouse_listener:
mouse_listener.stop()
if keyboard_listener:
keyboard_listener.stop()
if red_border_window is not None:
red_border_window.close()
if app is not None:
app.quit()
# 强制退出所有线程和进程
os._exit(0)
def button_name_from_pynput(button):
if button == mouse.Button.left:
return "left"
elif button == mouse.Button.right:
return "right"
elif button == mouse.Button.middle:
return "middle"
return str(button)
def record_event(event_info):
event_data.append(event_info)
save_event_data()
def record_click_event(click_type, pos, button):
screenshot_file = take_screenshot()
event_info = {
"type": click_type,
"time": datetime.now().isoformat(),
"button": button,
"coordinates": {"x": pos[0], "y": pos[1]},
"screenshot": screenshot_file
}
record_event(event_info)
logger.info(f"{click_type.capitalize()} at {pos}, button={button}")
def record_drag_event(start_pos, end_pos, path):
screenshot_file = take_screenshot()
event_info = {
"type": "drag",
"time": datetime.now().isoformat(),
"start_coordinates": {"x": start_pos[0], "y": start_pos[1]},
"end_coordinates": {"x": end_pos[0], "y": end_pos[1]},
"path": path,
"screenshot": screenshot_file
}
record_event(event_info)
logger.info(f"Drag from {start_pos} to {end_pos} recorded.")
def record_scroll_event(x, y, dx, dy):
screenshot_file = take_screenshot()
event_info = {
"type": "scroll",
"time": datetime.now().isoformat(),
"coordinates": {"x": x, "y": y},
"scroll": {"dx": dx, "dy": dy},
"screenshot": screenshot_file
}
record_event(event_info)
logger.info(f"Scroll at ({x},{y}) dx={dx}, dy={dy}")
def finalize_single_click():
global pending_click_event, pending_click_timer
with lock:
if pending_click_event is not None:
event = pending_click_event
pending_click_event = None
pending_click_timer = None
else:
event = None
if event:
logger.debug("Finalize single click event due to timeout.")
record_click_event("single_click", event["pos"], event["button"])
def cancel_pending_single_click():
global pending_click_event, pending_click_timer
with lock:
if pending_click_timer:
pending_click_timer.cancel()
pending_click_event = None
pending_click_timer = None
def schedule_single_click(pos, button):
global pending_click_event, pending_click_timer
with lock:
logger.debug("Scheduling single click event waiting for double click threshold...")
cancel_pending_single_click()
pending_click_event = {"pos": pos, "button": button}
pending_click_timer = Timer(double_click_threshold, finalize_single_click)
pending_click_timer.start()
def on_click(x, y, button, pressed):
global mouse_pressed, drag_path, drag_start, drag_button, press_time
global last_click_time, last_click_pos, last_click_button
btn_name = button_name_from_pynput(button)
if pressed:
mouse_pressed = True
drag_start = (x, y)
drag_button = btn_name
drag_path = []
press_time = time.time()
else:
mouse_pressed = False
release_time = time.time()
dx = x - drag_start[0]
dy = y - drag_start[1]
distance = (dx*dx + dy*dy)**0.5
if distance > click_distance_threshold:
record_drag_event(drag_start, (x,y), drag_path)
else:
current_time = time.time()
if (last_click_time is not None and
(current_time - last_click_time) <= double_click_threshold and
last_click_button == btn_name):
# 双击事件
logger.debug("Double click detected.")
cancel_pending_single_click()
record_click_event("double_click", (x,y), btn_name)
last_click_time = None
last_click_pos = None
last_click_button = None
else:
# 单击候选
schedule_single_click((x,y), btn_name)
last_click_time = current_time
last_click_pos = (x,y)
last_click_button = btn_name
def on_move(x, y):
global mouse_pressed, drag_path
if mouse_pressed:
drag_path.append({"x": x, "y": y, "time": datetime.now().isoformat()})
def on_scroll(x, y, dx, dy):
record_scroll_event(x, y, dx, dy)
def on_press(key):
# 处理ctrl+alt+esc退出
if key in [keyboard.Key.ctrl_l, keyboard.Key.ctrl_r,
keyboard.Key.shift, keyboard.Key.shift_r,
keyboard.Key.alt_l, keyboard.Key.alt_r]:
handle_modifiers(key, True)
else:
try:
key_char = key.char
if key_char and key_char.isprintable():
if handle_combination(key_char):
return
else:
handle_normal_char(key_char)
else:
handle_special_key(key)
except AttributeError:
handle_special_key(key)
# 检查ctrl+alt+esc退出条件
if key == keyboard.Key.esc and ctrl_pressed and alt_pressed:
check_exit_condition()
def on_release(key):
if key in [keyboard.Key.ctrl_l, keyboard.Key.ctrl_r,
keyboard.Key.shift, keyboard.Key.shift_r,
keyboard.Key.alt_l, keyboard.Key.alt_r]:
handle_modifiers(key, False)
class RedBorderWindow(QWidget):
def __init__(self):
super().__init__()
self.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint | Qt.Tool)
self.setAttribute(Qt.WA_TranslucentBackground, True)
self.setAttribute(Qt.WA_TransparentForMouseEvents, True)
screen = QGuiApplication.primaryScreen()
geometry = screen.geometry()
self.setGeometry(geometry)
self.showFullScreen()
def paintEvent(self, event):
painter = QPainter(self)
painter.setRenderHint(QPainter.Antialiasing)
width = self.width()
height = self.height()
border_width = 10
pen = QPen(QColor("red"))
pen.setWidth(border_width)
painter.setPen(pen)
painter.drawRect(QRect(int(border_width/2), int(border_width/2),
int(width - border_width), int(height - border_width)))
outer_region = QRegion(0, 0, width, height)
inner_region = QRegion(border_width, border_width, width - 2*border_width, height - 2*border_width)
frame_region = outer_region.subtracted(inner_region)
self.setMask(frame_region)
def start_listeners():
logger.info("Starting mouse and keyboard listeners...")
global mouse_listener, keyboard_listener
mouse_listener = mouse.Listener(
on_click=on_click,
on_move=on_move,
on_scroll=on_scroll
)
keyboard_listener = keyboard.Listener(
on_press=on_press,
on_release=on_release
)
mouse_listener.start()
keyboard_listener.start()
mouse_listener.join()
keyboard_listener.join()
if __name__ == "__main__":
logger.info("Program started.")
app = QApplication(sys.argv)
red_border_window = RedBorderWindow()
red_border_window.show()
listener_thread = Thread(target=start_listeners)
listener_thread.start()
result = app.exec_()
logger.info("Program exited with code {}".format(result))
os._exit(0) # 确保主线程退出后强制结束进程
摘要:
在AI模型的开发过程中,开发效率常常成为制约其发展的一个重要因素。AI模型开发与传统产品开发过程有许多相似之处,同样可以从中台技术带来的效率提升中受益。通过对开发过程的各个阶段进行抽象化,并引入标准化的开发流程、维护机制和辅助工具,可以显著加快模型的迭代速度。这样,模型开发就可以从简单的、各自为战的 阅读全文
摘要:
最近我在刷知乎的时候关注到了Meta的一个新工作CoPE(Contextual Position Encoding,上下文位置编码),在了解了其中的核心理念和实现后,我不自觉地联想到了Deformable attention,然后尝试将两者的相似点进行了一点整理。 为什么需要CoPE? 在处理文本序 阅读全文
摘要:
引言 自动驾驶技术正在迅速发展,但要实现真正的自主驾驶,模型必须能够复杂且准确地模拟人类司机的行为。通过我的最近实验,我探索了基本的模拟学习,然后进一步探索逆强化学习等方法,目的是让自动驾驶模型不仅能模仿,更能深入理解驾驶的决策过程。 模拟学习的初步尝试 在进行VLM端到端自动驾驶系统实验时,我和我 阅读全文
摘要:
在我深入研究大型语言模型时,我意识到特殊Token不仅仅是数据元素,它们在模型中扮演着关键角色,帮助理解和处理语言结构。举个例子,BERT的CLS标记帮助模型把握整个句子的含义,Memory Transformer的记忆Token则让模型能持续跟踪对话的上下文,而Meta最近提出的Register 阅读全文
摘要:
引言 在算法优化的世界中,理解所面对的任务不仅是起点,也是整个优化过程的核心。在这篇博客中,我将分享我在算法训练和优化中的一些经验,以及一个关于场景流估计的项目中应用的案例。我希望这些经验能帮助你在未来的项目中取得更好的成绩。 1. 深入理解任务和数据 理解算法项目的独特目标和挑战是优化的第一步。明 阅读全文
摘要:
简化 Python 日志管理:Loguru 入门指南 在开发和维护软件项目时,高效的日志管理系统对于监控应用程序的行为、调试代码和追踪异常至关重要。Python 的标准日志模块虽然功能强大,但其配置和使用往往较为复杂,尤其是对于新手开发者。这就是 Loguru 库发挥作用的地方,它以极简的方式重新定 阅读全文
摘要:
在编程世界中,效率是王道。对于Python开发者来说,line_profiler 是一把锐利的剑,能够深入代码的每一行,找出性能瓶颈。今天,就让我们一起深入探索 line_profiler,学习如何用它为你的Python程序注入强心剂,让代码效率飞跃。 line_profiler:性能分析的利器 l 阅读全文
摘要:
元 素 法 典 第二卷 https://docs.qq.com/doc/DWEpNdERNbnBRZWNL 阅读全文
摘要:
亲爱的码农小伙伴们,你们是否还在为Tensor的各种变换头大如斗?别怕,今天给大家送上一张超实用的PyTorch变换秘籍图,让你的Tensor操作如行云流水,CPU和GPU之间的切换如穿梭自如! 🚀 GPU上的Tensor怎么变CPU的?一招搞定! 就是这么简单粗暴——.cpu(),一个方法,轻轻 阅读全文