安装:
pip install sqlalchemy
# 安装数据库驱动:
pip install pymysql
pip install cx_oracle
举例:(在url后面加入?charset=utf8可以防止乱码)
from sqlalchemy import create_engine
engine=create_engine('mysql+pymysql://username:password@hostname:port/dbname', echo=True) #echo=True 打印sql语句信息
create_engine
接受一个url,格式为:
# '数据库类型+数据库驱动名称://用户名:口令@机器地址:端口号/数据库名'
# 常用的
engine = create_engine('sqlite:///:memory:', echo=True) # sqlite内存
engine = create_engine('sqlite:///./cnblogblog.db',echo=True) # sqlite文件
engine = create_engine("mysql+pymysql://username:password@hostname:port/dbname",echo=True) # mysql+pymysql
engine = create_engine('mssql+pymssql://username:password@hostname:port/dbname',echo=True) # mssql+pymssql
engine = create_engine('postgresql://scott:tiger@hostname:5432/dbname') # postgresql示例
engine = create_engine('oracle://scott:tiger@hostname:1521/sidname') # oracle
engine = create_engine('oracle+cx_oracle://scott:tiger@tnsname') #pdb就可以用tns连接
简单demo:
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('oracle://spark:a@orclpdb',echo=True) #echo要求打印sql语句等调试信息
session_maker = sessionmaker(bind=engine)
session = session_maker()
Base = declarative_base()
#对应一张表
class Student(Base):
__tablename__ = 'STUDENT'
id = Column('STUID', Integer, primary_key=True)
name = Column('STUNAME', String(32), nullable=False)
age = Column('STUAGE', Integer)
def __repr__(self):
return '<Student(id:%s, name:%s, age:%s)>' % (self.id, self.name, self.age)
Base.metadata.create_all(engine) #若存在STUDENT表则不做,不存在则创建。
queryObject = session.query(Student).order_by(Student.id.desc())
for ins in queryObject:
print(ins.id, ins.name, ins.age)
'''
4 hey 24
3 lwtxxs 27
2 gyb 89
1 ns 23
'''
将查询结果映射为DataFrame:
import pandas as pd
df = pd.read_sql(session.query(Student).filter(Student.id > 1).statement, engine)
print(df)
'''
STUID STUNAME STUAGE
0 4 hey 24
1 2 gyb 89
2 3 lwtxxs 27
'''
查询:
session的query方法除了可以接受Base子类对象作为参数外,还可以是字段,如:
query = session.query(Student.name, Student.age) # query为一个sqlalchemy.orm.query.Query对象
for stu_name, stu_age in query:
print(stu_name, stu_age)
查询条件filter:
# = / like
query.filter(Student.name == 'wendy')
query.filter(Student.name.like('%ed%'))
# in
query.filter(Student.name.in_(['wendy', 'jack']))
query.filter(Student.name.in_(
session.query(User.name).filter(User.name.like('%ed%'))
))
# not in
query.filter(~Student.name.in_(['ed', 'wendy', 'jack']))
# is null / is not null
query.filter(Student.name == None)
query.filter(Student.name.is_(None))
query.filter(Student.name != None)
query.filter(Student.name.isnot(None))
# and
from sqlalchemy import and_, or_
query.filter(and_(Student.name == 'ed', Student.age != 23))
query.filter(Student.name == 'ed', Student.age != 23)
query.filter(Student.name == 'ed').filter(Student.age != 23)
# or
query.filter(or_(Student.name == 'ed', Student.name == 'wendy'))
# match
query.filter(Student.name.match('wendy'))
Query的方法:
all()
方法以列表形式返回结果集:
from sqlalchemy import or_, and_
queryObject = session.query(Student).filter(or_(Student.id == 1, Student.id == 2))
print(queryObject.all()) # [<Student(id:1, name:ns, age:23)>, <Student(id:2, name:gyb, age:89)>]
queryObject = session.query(Student.name).filter(or_(Student.id == 1, Student.id == 2))
print(queryObject.all()) # [('ns',), ('gyb',)]
first()
方法返回单个结果。(若结果集为空则返回None)
print``(queryObject.first()) ``# ('ns',)
one()
方法返回单个结果,与first()
方法不同的是:当结果集中没有元素或有多于一个元素会抛出异常。 one_or_none()
方法同one()
一样,不同是结果集为空则返回None,为多个抛出异常。
查询数量:
from sqlalchemy import func session.query(func.count(Student.id)).scalar() # SELECT count("STUDENT"."STUID") AS count_1 FROM "STUDENT"
分组:
session.query(func.count(Student.id), Student.name).group_by(Student.name).all()
嵌套SQL语句:
from sqlalchemy import text query = session.query(Student.id, Student.name).filter(text('stuid>2')) query = session.query('stuid', 'stuname', 'stuage').from_statement(\ text("select * from student where stuname=:stuname")).params(stuname='hey').all() #[(4, 'hey', 24)]
sqlalchemy.exc.DatabaseError: (cx_Oracle.DatabaseError) ORA-12505: TNS: 监听程序当前无法识别连接描述符中所给出的 SID 主要原因可能是目标数据库是集群部署,可以咨询一下DBA python 用sqlalchemy 连接Oracle数据库的时候报了下面这个错误:
sqlalchemy.exc.DatabaseError: (cx_Oracle.DatabaseError) ORA-12505: TNS: 监听程序当前无法识别连接描述符中所给出的 SID (Background on this error at: http://sqlalche.me/e/4xp6)
这是因为 sqlalchemy 在create_engine的时候默认是调用cx_Oracle去连接数据库,而cx_Oracle 在创建dns连接字符串的时候是默认SID = tnsname (实例名),其实是在连接的时候调用了 cx_Oracle.makedns 来构造连接url,我们通过下面的例子来看
In[95]: import cx_Oracle In[96]: cx_Oracle.makedsn('10.24.04.19', '1314', 'report') Out[96]: '(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=10.24.04.19)(PORT=1314))(CONNECT_DATA=(SID=report)))' In[97]: cx_Oracle.makedsn('10.24.04.19', '1314', service_name='report') Out[97]: '(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=10.24.04.19)(PORT=1314))(CONNECT_DATA=(SERVICE_NAME=report)))'
因为cx_Oracle不会去读我们配置的tnsname.ora文件,而是通过传进去的参数来构造连接url 所以如果不指定service_name,那么这个函数就会默认将 ‘report’ 视为 SID (positional args),这样做的话对于单机部署的 Oracle数据库是没有问题的,但是如果目标数据库是集群部署的话,就会出现ORA-12505: TNS: 监听程序当前无法识别连接描述符中所给出的 SID 的情况。 更深入的解释可以看下面 由于oracle 是做的多节点,然后有一个公用的service_name,只有通过service_name去连接才能起到负载均衡的作用,而以cx_Oracle默认的连接串去连的话只能连接到实例名,而不能连接到service_name,所以oracle用service_name去匹配实例名,当然找不到。所以连接时必须指定连的是service_name而不是sid. 所以我们需要修改连接字符串 SQLAlchemy 连接方式
import cx_Oracle from sqlalchemy import create_engine ip = '10.24.04.19' port = '1314' uname = 'jiajia' # 用户名 pwd = 'yupeng' # 密码 tnsname = 'report' # 实例名 dsnStr = cx_Oracle.makedsn(ip, port, service_name=tnsname) connect_str = "oracle://%s:%s@%s" %(uname, pwd, dsnStr) engine = create_engine(connect_str, encoding=encoding)
cx_Oracle连接方法
conn = cx_Oracle.connect(uname, pwd, dsn=dsnStr)
本文实例讲述了Python使用sqlalchemy模块连接数据库操作。分享给大家供大家参考,具体如下:
安装:
pip install sqlalchemy # 安装数据库驱动: pip install pymysql pip install cx_oracle
举例:(在url后面加入?charset=utf8可以防止乱码)
from sqlalchemy import create_engine engine=create_engine('mysql+pymysql://username:password@hostname:port/dbname', echo=True) #echo=True 打印sql语句信息
create_engine
接受一个url,格式为:
# '数据库类型+数据库驱动名称://用户名:口令@机器地址:端口号/数据库名' # 常用的 engine = create_engine('sqlite:///:memory:', echo=True) # sqlite内存 engine = create_engine('sqlite:///./cnblogblog.db',echo=True) # sqlite文件 engine = create_engine("mysql+pymysql://username:password@hostname:port/dbname",echo=True) # mysql+pymysql engine = create_engine('mssql+pymssql://username:password@hostname:port/dbname',echo=True) # mssql+pymssql engine = create_engine('postgresql://scott:tiger@hostname:5432/dbname') # postgresql示例 engine = create_engine('oracle://scott:tiger@hostname:1521/sidname') # oracle engine = create_engine('oracle+cx_oracle://scott:tiger@tnsname') #pdb就可以用tns连接
简单demo:
from sqlalchemy import create_engine, Column, Integer, String from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base engine = create_engine('oracle://spark:a@orclpdb',echo=True) #echo要求打印sql语句等调试信息 session_maker = sessionmaker(bind=engine) session = session_maker() Base = declarative_base() #对应一张表 class Student(Base): __tablename__ = 'STUDENT' id = Column('STUID', Integer, primary_key=True) name = Column('STUNAME', String(32), nullable=False) age = Column('STUAGE', Integer) def __repr__(self): return '<Student(id:%s, name:%s, age:%s)>' % (self.id, self.name, self.age) Base.metadata.create_all(engine) #若存在STUDENT表则不做,不存在则创建。 queryObject = session.query(Student).order_by(Student.id.desc()) for ins in queryObject: print(ins.id, ins.name, ins.age) ''' 4 hey 24 3 lwtxxs 27 2 gyb 89 1 ns 23 '''
将查询结果映射为DataFrame:
import pandas as pd df = pd.read_sql(session.query(Student).filter(Student.id > 1).statement, engine) print(df) ''' STUID STUNAME STUAGE 0 4 hey 24 1 2 gyb 89 2 3 lwtxxs 27 '''
查询:
session的query方法除了可以接受Base子类对象作为参数外,还可以是字段,如:
query = session.query(Student.name, Student.age) # query为一个sqlalchemy.orm.query.Query对象 for stu_name, stu_age in query: print(stu_name, stu_age)
查询条件filter:
# = / like query.filter(Student.name == 'wendy') query.filter(Student.name.like('%ed%')) # in query.filter(Student.name.in_(['wendy', 'jack'])) query.filter(Student.name.in_( session.query(User.name).filter(User.name.like('%ed%')) )) # not in query.filter(~Student.name.in_(['ed', 'wendy', 'jack'])) # is null / is not null query.filter(Student.name == None) query.filter(Student.name.is_(None)) query.filter(Student.name != None) query.filter(Student.name.isnot(None)) # and from sqlalchemy import and_, or_ query.filter(and_(Student.name == 'ed', Student.age != 23)) query.filter(Student.name == 'ed', Student.age != 23) query.filter(Student.name == 'ed').filter(Student.age != 23) # or query.filter(or_(Student.name == 'ed', Student.name == 'wendy')) # match query.filter(Student.name.match('wendy'))
Query的方法:
all()
方法以列表形式返回结果集:
from sqlalchemy import or_, and_ queryObject = session.query(Student).filter(or_(Student.id == 1, Student.id == 2)) print(queryObject.all()) # [<Student(id:1, name:ns, age:23)>, <Student(id:2, name:gyb, age:89)>] queryObject = session.query(Student.name).filter(or_(Student.id == 1, Student.id == 2)) print(queryObject.all()) # [('ns',), ('gyb',)]
first()
方法返回单个结果。(若结果集为空则返回None)
print(queryObject.first()) # ('ns',)
one()
方法返回单个结果,与first()
方法不同的是:当结果集中没有元素或有多于一个元素会抛出异常。 one_or_none()
方法同one()
一样,不同是结果集为空则返回None,为多个抛出异常。
查询数量:
from sqlalchemy import func session.query(func.count(Student.id)).scalar() # SELECT count("STUDENT"."STUID") AS count_1 FROM "STUDENT"
分组:
session.query(func.count(Student.id), Student.name).group_by(Student.name).all()
嵌套SQL语句:
from sqlalchemy import text query = session.query(Student.id, Student.name).filter(text('stuid>2')) query = session.query('stuid', 'stuname', 'stuage').from_statement(\ text("select * from student where stuname=:stuname")).params(stuname='hey').all() #[(4, 'hey', 24)]
基本查询结果
# 1 查看sql原生语句 rs =session.query(User).filter(User.username=='budong') print(rs) # 2 query(module) .all() rs =session.query(User).filter(User.username=='budong').all() # .all list print(rs, type(rs[0])) # 索引取值 当query(module) 类型为User类的实例对象 print(rs[0].username,rs[0].id) # rs[0]这个实例对象通过.username,.id取得值
# 3 hasattr() getattr() # 没有这条数据则会报错超出索引 先判断是否存在hasattr() ,再取值getattr()数据不存在报错 print(hasattr(rs[0], 'username')) # 判断是否有这个username属性 返回值True False if hasattr(rs[0], 'username'): print(getattr(rs[0],'username')) # 安全取值 print(rs[0].username) >>> True >>> budong >>> budong
# 4 .first() [0] rs =session.query(User).filter(User.username=='budong').first() # 返回一条数据,无则返回none rs1 =session.query(User).filter(User.username=='budong')[0] # 取第一条数据无则报错 print(rs, rs1,sep='\n') print(rs.id, rs1.username) # 取出值 if rs != None: print(rs) >>> <User(id=1,username=budong,password=qwe123,createtime=2018-03-07 16:57:09)> <User(id=1,username=budong,password=qwe123,createtime=2018-03-07 16:57:09)> >>> 1 budong >>> <User(id=1,username=budong,password=qwe123,createtime=2018-03-07 16:57:09)>
# 5 query(module的属性) rs =session.query(User.id).filter(User.username=='tj').all() # all返回list print(rs) # list print(rs[0]) # 当query(module的属性) 返回结果为元祖 print(rs[0][0]) >>> [(2,)] >>> (2,) >>> 2
# 6 条件查询 # filter_by(直接跟module的属性,以字典形式传参) 并且只能判断 = rs = session.query(User).filter_by(username='budong').all() print(rs) # filter(module.属性) 能判断 == != >= 常用 rs = session.query(User).filter(User.username=='budong').all() print(rs) >>> [<User(id=1,username=budong,password=qwe123,createtime=2018-03-07 16:57:09)>, <User(id=5,username=budong,password=qweq,createtime=2018-03-08 05:10:38)>] >>> [<User(id=1,username=budong,password=qwe123,createtime=2018-03-07 16:57:09)>, <User(id=5,username=budong,password=qweq,createtime=2018-03-08 05:10:38)>]
模糊查询
# 7 模糊查询 # like_ notlike rs = session.query(User).filter(User.username.like('%don%')).all() print(rs) rs = session.query(User).filter(User.username.notlike('%don%')).all() # 相反 print(rs) >>> [<User(id=1,username=budong,password=qwe123,createtime=2018-03-07 16:57:09)>, <User(id=5,username=budong,password=qweq,createtime=2018-03-08 05:10:38)>] >>> [<User(id=2,username=tj,password=123qwe1,createtime=2018-03-07 16:54:09)>, <User(id=3,username=tj1,password=123qwe2,createtime=2018-03-07 16:58:09)>, <User(id=4,username=tj2,password=123qwe3,createtime=2018-03-07 16:07:09)>]
# 8 in_ notin 满足一个条件即可rs = session.query(User).filter(User.username.in_(['budong','tj'])).all() print(rs) rs = session.query(User).filter(User.username.notin_(['budong','tj'])).all() # 相反 print(rs) >>> [<User(id=1,username=budong,password=qwe123,createtime=2018-03-07 16:57:09)>, <User(id=2,username=tj,password=123qwe1,createtime=2018-03-07 16:54:09)>, <User(id=5,username=budong,password=qweq,createtime=2018-03-08 05:10:38)>] >>> [<User(id=3,username=tj1,password=123qwe2,createtime=2018-03-07 16:58:09)>, <User(id=4,username=tj2,password=123qwe3,createtime=2018-03-07 16:07:09)>]
# 9 is_ isnot is 用来判断是否为空 是空则取值 rs = session.query(User.username).filter(User.username.is_(None)).all() print(rs) rs = session.query(User.username).filter(User.username.isnot(None)).all() # 相反 print(rs) >>> [] >>> [('budong',), ('tj',), ('tj1',), ('tj2',), ('budong',)]
# 10 limit 限制数据条数 rs =session.query(User).filter(User.username=='budong').all() print(rs) rs =session.query(User).filter(User.username=='budong').limit(1).all() print(rs) >>> [<User(id=1,username=budong,password=qwe123,createtime=2018-03-07 16:57:09)>, <User(id=5,username=budong,password=qweq,createtime=2018-03-08 05:10:38)>] >>> [<User(id=1,username=budong,password=qwe123,createtime=2018-03-07 16:57:09)>]
# 11 offset(n) 不取前n条数据 称为 偏移量:偏移n条数据 rs = session.query(User).filter(User.username=='budong').offset(1).all() print(rs) >>> [<User(id=5,username=budong,password=qweq,createtime=2018-03-08 05:10:38)>] # 12 slice 切片 左闭右开 rs = session.query(User).filter(User.username=='budong').slice(0,1).all() print(rs) >>> [<User(id=1,username=budong,password=qwe123,createtime=2018-03-07 16:57:09)>] # 13 one 只有一条数据则取值 反之 报错 # rs = session.query(User).filter(User.username=='budong').one() # 满足条件的超过1条,报错 rs = session.query(User).filter(User.username=='tj').one() print(rs) >>> <User(id=2,username=tj,password=123qwe1,createtime=2018-03-07 16:54:09)>
# 14 order_by(*args) 排序(按asc) # 升序 rs = session.query(User.id).filter(User.username=='budong').order_by(User.id).all() print(rs) # 降序 需导入降序desc from sqlalchemy import desc rs = session.query(User.id).filter(User.username=='budong').order_by(desc(User.id)).all() print(rs) >>> [(1,), (5,)] >>> [(5,), (1,)]
# 15 group_by from sqlalchemy import func,extract # 按query的属性 进行分组 再统计该属性的所有值出现的次数 rs = session.query(User.username,func.count(User.id)).group_by(desc(User.username)).all() print(rs) >>> [('tj2', 1), ('tj1', 1), ('tj', 1), ('budong', 2)]
# 16 group_by + having(判断条件 常跟func的count sum avg 等使用) 先分组在执行having rs = session.query(User.username,func.count(User.id)).group_by(desc(User.username)).\ having(func.count(User.id)>1).all() print(rs) rs = session.query(User.username,func.max(User.id)).group_by(User.username).all() print(rs) # 通过username分组 多条数据的取id最大的那条 rs = session.query(User.username,func.min(User.id)).group_by(User.username).all() print(rs) # 通过username分组 多条数据的取id最小的那条 >>> [('budong', 2)] >>> [('budong', 5), ('tj', 2), ('tj1', 3), ('tj2', 4)] >>> [('budong', 1), ('tj', 2), ('tj1', 3), ('tj2', 4)]
# 17 extract 能获取某部分时间(year,month,day,hour,minute,second) 进行分组及统计 rs = session.query(extract('minute',User.creatime).label('minute'),func.count('minute')).\ group_by('minute').all() # label 取别名 print(rs) >>> [(7, 1), (10, 1), (54, 1), (57, 1), (58, 1)]
# 18 or_ 或者 满足其中一个条件即可 类似in_ notin rs = session.query(User.username).filter(or_(User.password=='qwe123',User.id>2)).all() print(rs) >>> [('budong',), ('tj1',), ('tj2',), ('budong',)]
User这个类创建的表 User1这个类创建的表
# 19 多表查询 # mysql中的 内链接cross join 内链接inner join 两者没区别, 内链接的结果会产生笛卡儿积 table1(的每条数据) X table2(的所有数据) rs = session.query(User.username,User1.name).filter(User.id==User1.id).all() # 通过,直接query两张表= select * from table1,table2 属于内链接cross join print(rs) rs = session.query(User.username,User1.name).join(User1,User.id==User1.id).all() # join =内链接inner join print(rs) # mysql中的 外链接left join 和 外链接left outer join也没区别 # 外链接outerjoin = left outer join -- sqlalchemy 没有right outer join rs = session.query(User.username,User1.name).outerjoin(User1,User.id==User1.id).all() print(rs)we # 已左表为准 两个表的数据并排显示,左表有多少条数据则显示多少,右边有多余的数据则不取,少于的数据则显示为None数据链接到左表 rs = session.query(User1.name,User.username).outerjoin(User,User.id==User1.id).all() #与上面相比交换表的位置 print(rs) >>> [('budong', 'D'), ('tj', 'A'), ('tj1', 'B'), ('tj2', 'C')] >>> [('budong', 'D'), ('tj', 'A'), ('tj1', 'B'), ('tj2', 'C')] >>> [('budong', 'D'), ('tj', 'A'), ('tj1', 'B'), ('tj2', 'C'), ('budong', None)] >>> [('D', 'budong'), ('A', 'tj'), ('B', 'tj1'), ('C', 'tj2')]
# 20 联合查询 两个表并排显示 rs1 = session.query(User1.name) rs2 = session.query(User.username) print(rs1.union(rs2).all()) # union 去重 print(rs1.union_all(rs2).all()) # 显示所有包括重复的数据 'budong'为重复的数据 >>> [('D',), ('A',), ('B',), ('C',), ('budong',), ('tj',), ('tj1',), ('tj2',)] >>> [('D',), ('A',), ('B',), ('C',), ('budong',), ('tj',), ('tj1',), ('tj2',), ('budong',)]
# 21 子表查询 cross join 产生笛卡儿积# 原生sql是 select * from table1,table2; table2是这儿的子表 # 声明子表subquery() 子表可以是多个表取出的数据 所以比直接使用 cross join or inner join 能查更多表的相关数据 sql = session.query(User1.name).subquery() # 父表的每一条数据都匹配子表的所有数据 print(session.query(User.username,sql.c.name).all()) # 固定写法 申明子表的sql.c.属性 >>> [('budong', 'D'), ('tj', 'D'), ('tj1', 'D'), ('tj2', 'D'), ('budong', 'D'), ('budong', 'A'), ('tj', 'A'), ('tj1', 'A'), ('tj2', 'A'), ('budong', 'A'), ('budong', 'B'), ('tj', 'B'), ('tj1', 'B'), ('tj2', 'B'), ('budong', 'B'), ('budong', 'C'), ('tj', 'C'), ('tj1', 'C'), ('tj2', 'C'), ('budong', 'C')]
原生sql语句查询
# 原生SQL查询 sql_1='select username from `user`' row = session.execute(sql_1) # row =5条数据 row是一个对象 可以 for in 取值 dir(对象) print(row.fetchone()) # 取出第一条数据 row -1 =4 print(row.fetchmany(2)) # 去出两条数据 row -2 =2 print(row.fetchall()) # 取出所有的数据 row =0 >>> ('budong',) >>> [('tj',), ('tj1',)] >>> [('tj2',), ('budong',)]
sql是字符串 可以用到字符串拼接
sql = ''' select * from user where id<%s; ''' %(3) row = session.execute(sql) for i in row: print(i) # 元祖 >>> (1, 'budong', 'qwe123', datetime.datetime(2018, 3, 7, 16, 57, 9)) >>> (2, 'tj', '123qwe1', datetime.datetime(2018, 3, 7, 16, 54, 9))