win系统下各种版本的simstring安装
------------simstring(original)---------------------
-
package url: http://www.chokkan.org/software/simstring/
-
author: chokkan
-
完全由C++编写
------------simstring-fast--------------------------
-
package url: https://github.com/banking-circle-advanced-analytics/simstring-fast
-
完全由Python编写
-
Install:
pip install simstring-fast
------------simstring-pure--------------------------
-
package url: https://github.com/nullnull/simstring
-
完全由Python编写
-
Install:
pip install simtring-pure
------------simstring--------------------------------
-
package url: https://github.com/Georgetown-IR-Lab/simstring
-
Install: 需要C++开发工具,下载Visual Studio并安装C++桌面开发工具
-
创建环境
mamba create -n simstring_test python==3.10
mamba activate simstring_test
- 修改下载源
conda config --set show_channel_urls yes
# 手动修改`C:\Users\用户名\.condarc` 文件,替换为以下内容:
channels:
- defaults
show_channel_urls: true
default_channels:
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
custom_channels:
conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
msys2: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
bioconda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
menpo: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
simpleitk: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
deepmodeling: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/
- 正式安装
conda clean -i # 清除缓存
mamba install -c conda-forge libiconv # 安装依赖
# 在手动安装simstring之前,解压缩simstring-master,并找到setup.py文件,在install()函数的matcher下一行添加:
pyd_file = '_simstring.cp310-win_amd64.pyd
# 310代表python解释器的版本,注意根据不同版本进行修改,python3.9就是39
# 在终端中cd到simstring-master文件夹下,执行:
python setup.py build
# 返回:
# 正在创建库 build\temp.win-amd64-cpython-310\Release\quickumls_simstring\_simstring.cp310-win_amd64.lib 和对象 build\temp.win-amd64-cpython-310\Release\quickumls_simstring\_simstring.cp310-win_amd64.exp
# 正在生成代码
# 已完成代码的生成
python setup.py install
# 返回:
# Manually copying a Windows PYD from build\lib.win-amd64-cpython-310\_simstring.cp310-win_amd64.pyd to build\bdist.win-amd64\egg\quickumls_simstring\_simstring.cp310-win_amd64.pyd
# zip_safe flag not set; analyzing archive contents...
# __pycache__._simstring.cpython-310: module references __file__
# quickumls_simstring.__pycache__.simstring.cpython-310: module references __file__
# 此时已安装成功
# 测试:
import quickumls_simstring.simstring as simstring
db = simstring.writer('sample.db')
db.insert('Barack Hussein Obama II')
db.insert('James Gordon Brown')
db.close()
db = simstring.reader('sample.db')
db.measure = simstring.cosine
db.threshold = 0.6
print(db.retrieve('Barack Obama')) # OK.
print(db.retrieve('Gordon Brown')) # OK.
print(db.retrieve('Obama'))