import imghdr
import time
from io import BytesIO
import pyarrow.parquet as pq
from PIL import Image
def resize_image(img, max_side_length=768):
# 打开图像文件
width, height = img.size
print("原:宽X高", width, "x", height)
# 计算缩放后的尺寸
scale_factor = max_side_length / max(width, height)
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
# 缩放图像
resized_img = img.resize((new_width, new_height))
return resized_img
file1 = "/Users/chennan/Downloads/2.parquet"
df = pq.read_pandas(file1).to_pandas()
select_df = df.loc[:100]
for index, row in select_df.iterrows():
url = row['URL']
buff = BytesIO(url)
img = Image.open(buff)
width, height = img.size
if max(width, height) < 768:
continue
resize_img = resize_image(img)
buff.seek(0)
img_type = imghdr.what(buff)
print(resize_img.size, img_type)
timestamp = int(time.time())
resize_img.save(f"{timestamp}.{img_type}")
img.save(f"原_{timestamp}.{img_type}")
time.sleep(0.1)