duckdb

conda install conda-build anaconda-client
conda create --name duckdb-conda
conda activate duckdb-conda
pip install duckdb
conda skeleton pypi duckdb --version 0.9.1
cd duckdb
conda build .
//不成功直接下载安装whl
https://pypi.tuna.tsinghua.edu.cn/packages/10/32/96e307be43f1653a9e94f729fd48aa869840aeaecbb0d7be84789881c6d5/duckdb-0.9.1-cp310-cp310-win_amd64.whl
//查询依赖
pip install pipdeptree
pipdeptree --packages duckdb  
# %%
import pandas as pd
import glob
import time
import duckdb
# %%
conn = duckdb.connect()
# %%
cur_time = time.time()
df = conn.execute(
    """
            select * from read_csv_auto('*.csv',header=True)
            limit 10
            """
).df()
print(f"time:{(time.time()-cur_time)}")
print(df)
# %%
conn.register("df_view", df)
# %%
conn.execute("describe df_view").df()
# %%
df.isnull().sum()
df = df.dropna(how="all")
# %%
conn.execute("select count(1) from df").df()
# %%
conn.execute(
    """
with a as (select * from df)
             select * from a
             """
).df()
# %%
conn.execute("copy (from read_csv_auto('*.csv',header=True)) to 'aaa.parquet'")
# %%
conn.execute("copy (from 'aaa.parquet') to 'a.csv'(DELIMITER ',', HEADER)")
# %%
conn.execute("SHOW TABLES;").df()
# %%
import pandas as pd
import glob
import time
import duckdb
import sqlite3
conn = duckdb.connect()
df = conn.execute(
    """
            select * from read_csv_auto('a.csv',header=True)
            """
).df()
conn3 = sqlite3.connect('database.db')
df.to_sql('table_name', conn3, if_exists='replace', index=False)
conn3.close()
posted @ 2023-10-26 17:29  月渊  阅读(86)  评论(0编辑  收藏  举报