小工具
目录
go工具
1.数据库工具
1.1golang-gorm查询
func (o *object) QueryInfoByUid(uid int) (user User) {
if uid <= 0 {
o.Log.Error(qutil.ParamError)
return
}
//err := o.Db.Model(User{}).Where(User{Uid: uid}).Find(&user).Error //方式一
err := o.Db.Model(User{}).Where("id = ?",id).Find(&user).Error // 方式二
if err != nil {
o.Log.Error(err)
}
return
}
type User struct {
id int `json:"id"`
Name string `json:"name"`
}
func (User) TableName() string {
return "users"
}
1.2golang-sql查询
func (o object) QueryInfo(FBillNo string) *Info {
r := new(Info)
var qstr string
switch {
case len(FBillNo) > 0:
qstr += fmt.Sprintf(" and \"FBillNo\" = '%s'", FBillNo)
default:
o.Log.Error("invalid param")
return nil
}
sqlstr := `
select
coalesce("FBillNo",'') as FBillNo,
coalesce(fshortnumber,'') as fshortnumber,
coalesce(fname,'') as fname,
coalesce(fchexing,'') as fchexing,
coalesce(fys,'') as fys
from 表名
where 1=1
`
sqlstr += qstr
err := o.DbRo.QueryRow(sqlstr).Scan(&r.FBillNo, &r.FShortNumber, &r.FName, &r.FCheXing, &r.Fys)
if err != nil {
o.Log.Errorf("param=%s,sql=%s,err=%v", FBillNo, sqlstr, err)
return nil
}
return r
}
1.3golang-批量写入
1
func toTabInsert(rows []*Info) {
db, _ := DB.GORM.DB()
tx, err := db.Begin()
if err != nil {
log.Fatal(err)
}
stmt, err := tx.Prepare("insert into user (xxx) values(?,?,?,?,?,?)")
if err != nil {
log.Fatal(err)
}
for _, row := range rows {
if _, err := stmt.Exec(xxx); err != nil {
log.Fatal(err)
}
}
_ = tx.Commit()
}
2
// 批量更新下注
func (impl *user) UpdateList(ctx context.Context, UserList []*user) error {
session := impl.NewTransactionSession(ctx)
err := session.Transaction(func(tx *gorm.DB) error {
for _, user := range UserList {
err := tx.Table("user").Where("uid = ? and round_id = ?", user.Uid, user.RoundId).Updates(map[string]any{
"name": user.xxx,
"age": user.xxx,
"xxx": user.xxx,
}).Error
if err != nil {
return err
}
}
return nil
})
if err != nil {
log.Errorf("UpdateList err:", err.Error())
return err
}
return nil
}
// 批量处理
func BatchDto() {
/*
1.批量保存500条,不足500条,超过10秒自动保存
2.处理关闭信号,在程序关闭时需要处理chan中剩余的任务
*/
ctx := context.Background()
batchSize := 500
var batch []*modelW.OdsDouyinVideo
signalChan := make(chan os.Signal, 1)
signal.Notify(signalChan, os.Interrupt, syscall.SIGTERM)
go func() {
s.log.Info("Program start")
// 等待程序关闭信号
<-signalChan
s.log.Info("Program closing")
for data := range chSave {
batch = append(batch, data)
}
err := s.saveBatch(ctx, batch)
if err != nil {
s.log.Errorf("error:%v", err)
}
batch = nil
return
}()
for {
select {
case data, ok := <-chSave:
if !ok {
continue
}
batch = append(batch, data)
if len(batch) >= batchSize {
err := s.saveBatch(ctx, batch)
if err != nil {
s.log.Errorf("error:%v", err)
}
batch = nil
}
case <-time.After(2 * time.Second):
if len(batch) > 0 {
err := s.saveBatch(ctx, batch)
if err != nil {
s.log.Errorf("error:%v", err)
}
batch = nil
}
}
}
}
1.4golang-发送邮箱
邮箱
//SendMailCode 邮箱-发送邮箱验证码
/**
* @receiver m
* @param ctx 上下文
* @param tx 事务
* @param mail 邮箱
* @param mailCode 邮箱验证码
* @return resData 结果集
* @return err 错误信息
*/
import (
"gopkg.in/gomail.v2"
"strconv"
)
func SendMailCode(mailSubject string, mail string, mailContent string) (resData bool, err error) {
// 设置邮箱主体
mailConn := map[string]string{
"user": "123@gmblgamefi.com",
"pass": "123",
"host": "123aliyun.com",
"port": "123",
}
port, _ := strconv.Atoi(mailConn["port"])
mailNew := gomail.NewMessage()
mailNew.SetHeader("From", mailNew.FormatAddress(mailConn["user"], "")) // 添加别名
mailNew.SetHeader("To", mail) // 发送给用户
mailNew.SetHeader("Subject", mailSubject) // 设置邮件主题
mailNew.SetBody("text/html", mailContent) // 设置邮件正文
d := gomail.NewDialer(mailConn["host"], port, mailConn["user"], mailConn["pass"]) // 设置邮件正文
err = d.DialAndSend(mailNew)
if err != nil {
panic(err)
}
return true, nil
}
1.5mysql工具
1.avg时:会忽略NULL值。如连表后的NULL值。
例如:10条数据,8条字段为NULL。那么avg时只会平均2条有值的。解决:其他为NULL值当作0处理,平均10条数据。
AVG(COALESCE(age,0)) as "AvgAge"
2.sql分组排序后取分组的前N个
使用ROW_NUMBER() OVER,排序编号
SELECT
*
FROM
( SELECT *, ROW_NUMBER() OVER ( PARTITION BY user_id ORDER BY updated_at DESC ) AS rn FROM user_table WHERE user_id IN ( 2,3,4 ) and status = 1 ) t
WHERE
t.rn = 1
3.gorm执行原生update的sql
sql:=`update xxx`
err = dbCtx.Exec(sql).Error
if err != nil {
a.log.WithContext(ctx).Error(err)
return err
}
4.分组累加之后,每行所占数值比例
UPDATE user_table AS t1
INNER JOIN (
SELECT date, type, open_id, SUM(money) AS total_money
FROM user_table
GROUP BY type, date, open_id
) AS t2
ON t1.open_id = t2.open_id
AND t1.date = t2.date
AND t1.type = t2.type
SET t1.distributions_ratio = (t1.money / t2.total_money)
WHERE t2.total_money>0
2.其他工具
2.1 in
func elementIsInSlice(element int, elements []int) (isIn bool) {
for _, item := range elements {
if element == item {
isIn = true
return
}
}
return
}
func elementIsInSlice2(element string, elements []string) (isIn bool) {
for _, item := range elements {
if element == item {
isIn = true
return
}
}
return
}
func main() {
li:=[]string{"a","b","c"}
if elementIsInSlice2("a",li){
fmt.Println("在里面")
}
}
2.2字符串反转
func main() {
fmt.Println(reverseString("abc"))
}
//字符串反转
func reverseString(str string) string {
strRune := []rune(str)
l := len(strRune)
for i := 0; i < l/2; i++ {
strRune[i], strRune[l-i-1] = strRune[l-i-1], strRune[i]
}
return string(strRune)
}
2.3随机数
func RandomInt(length int) (str string) {
var arr []byte = []byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
size := len(arr)
for i := 0; i < length; i++ {
str += string(arr[r.Intn(size)])
}
return
}
2.4万能格式字符串时间转时间戳 (支持10种格式)
func demo() {
str1 := "2022-1-1" //1.yyyy-m-d
str2 := "2022-01-01" //2.yyyy-mm-dd
str3 := "2022/01/01" //3:yyyy/mm/dd
str4 := "2022/1/1" //4:yyyy/m/d
str5 := "2022-1-1 00-00-00" //5:yyyy-mm-dd hh-mm-ss
str6 := "2022-1-1 00:00:00" //6:yyyy-mm-dd hh:mm:ss
str7 := "2022-1-1 00/00/00" //7:yyyy-mm-dd hh/mm/ss
str8 := "2022/1/1 00-00-00" //8:yyyy/mm/dd hh-mm-ss
str9 := "2022/1/1 00:00:00" //9:yyyy/mm/dd hh:mm:ss
str10 := "2022/1/1 00/00/00" //10:yyyy/mm/dd hh/mm/ss
fmt.Println(ParseTime(str10))
}
func ParseTime(strTime string) (int64, error) {
strTime = strings.TrimSpace(strTime)
s := strings.Split(strTime, " ")
layout := "2006-1-2 15-04-05" //默认格式
switch len(s) {
case 1: //年月日
if strings.Contains(strTime, "-") {
layout = "2006-1-2" //1,2:yyy-m-d,yyy-mm-dd
} else {
layout = "2006/1/2" //3,4:yyyy/mm/dd,yyyy/m/d
}
case 2: //年月日时分秒
if strings.Contains(s[0], "-") {
if strings.Contains(s[1], "-") {
layout = "2006-1-2 15-04-05" //5:yyyy-mm-dd hh-mm-ss
} else {
if strings.Contains(s[1], ":") {
layout = "2006-1-2 15:04:05" //6:yyyy-mm-dd hh:mm:ss
} else {
layout = "2006-1-2 15/04/05" //7:yyyy-mm-dd hh/mm/ss
}
}
} else {
if strings.Contains(s[1], "-") {
layout = "2006/1/2 15-04-05" //8:yyyy/mm/dd hh-mm-ss
} else {
if strings.Contains(s[1], ":") {
layout = "2006/1/2 15:04:05" //9:yyyy/mm/dd hh:mm:ss
} else {
layout = "2006/1/2 15/04/05" //10:yyyy/mm/dd hh/mm/ss
}
}
}
}
shipTsInt, err := time.ParseInLocation(layout, strTime, time.Local)
if err != nil {
fmt.Println("err:", err)
return 0, err
}
return shipTsInt.Unix(), nil
}
2.5过期map
package main
import (
"fmt"
"time"
)
type Cache struct {
data map[string]interface{}
expireAt map[string]time.Time
}
func (c *Cache) Set(key string, value interface{}, expire time.Duration) {
c.data[key] = value
c.expireAt[key] = time.Now().Add(expire)
}
func (c *Cache) Get(key string) (interface{}, bool) {
expireTime, ok := c.expireAt[key]
if !ok || time.Now().After(expireTime) {
delete(c.data, key)
delete(c.expireAt, key)
return nil, false
}
return c.data[key], true
}
func main() {
cache := &Cache{
data: make(map[string]interface{}),
expireAt: make(map[string]time.Time),
}
// 设置键值对的过期时间为1秒
cache.Set("name", "123456", time.Second)
for {
value, exist := cache.Get("name")
if exist {
fmt.Println(value)
} else {
fmt.Println("Key has expired or does not exist.")
break
}
time.Sleep(time.Millisecond)
}
}
2.6追加写文件
//写文件
func Ioutil_write(data string) {
filePath := "./a.txt"
// 打开文件,如果文件不存在则创建
file, err := os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
if err != nil {
fmt.Println(err)
return
}
defer file.Close()
// 将文件指针定位到文件末尾
_, err = file.Seek(0, io.SeekEnd)
if err != nil {
fmt.Println(err)
return
}
// 将内容写入文件
_, err = io.WriteString(file, data)
if err != nil {
fmt.Println(err)
return
}
}
2.7两个切片合并去重
func mergeAndDeduplicate(s1, s2 []int) []int {
merged := append(s1, s2...)
seen := make(map[int]bool)
result := []int{}
for _, v := range merged {
if _, ok := seen[v]; !ok {
seen[v] = true
result = append(result, v)
}
}
return result
}
2.8字符串长度
使用utf8.RuneCountInString函数来计算字符串的长度,其中每个字符都被视为一个Unicode码点。
func main() {
str := "123"
length := utf8.RuneCountInString(str)
fmt.Println("字符串长度:", length)
}
3.excel操作
tealeg/xlsx包操作
"github.com/tealeg/xlsx"
// 创建一个新的Excel文件
file := xlsx.NewFile()
// 创建一个新的工作表
sheet, err := file.AddSheet("Sheet1")
if err != nil {
log.Fatal(err)
}
// 设置第一列的宽度为20个字符宽度单位
sheet.SetColWidth(0, 20)
// 创建一个新的单元格,赋值为"Hello, world!"
cell := sheet.Cell(1, 1)
cell.SetValue("Hello, world!")
// 保存Excel文件
err = file.Save("example.xlsx")
if err != nil {
log.Fatal(err)
}
//样式设置
样式设置
//左对齐
left_align := *xlsx.DefaultAlignment()
left_align.Horizontal = "left"
//居中
center_H_align := *xlsx.DefaultAlignment()
center_H_align.Horizontal = "center"
//右对齐
right_align := *xlsx.DefaultAlignment()
right_align.Horizontal = "right"
//标题字体样式设置
border := *xlsx.NewBorder("thin", "thin", "thin", "thin")
title_style := xlsx.NewStyle()
font := *xlsx.NewFont(18, "SimSun")
font.Bold = true
title_style.Font = font
title_style.Alignment = center_H_align
title_style.ApplyAlignment = true
title_style.Border = border
title_style.ApplyBorder = true
//内容字体样式设置
text_style := xlsx.NewStyle()
font = *xlsx.NewFont(20, "宋体")
text_style.Font = font
text_style.Alignment = center_H_align
text_style.ApplyAlignment = true
text_style.Border = border
text_style.ApplyBorder = true
//应用配置
cell.SetStyle(title_style)
读
package main
import (
"fmt"
"github.com/tealeg/xlsx"
)
func main() {
excelFileName := "test.xlsx"
xlFile, err := xlsx.OpenFile(excelFileName) //所有的sheet
if err != nil {
fmt.Println("err:", err)
return
}
fmt.Println(xlFile.Sheets[0])//第0个sheet
for _, sheet := range xlFile.Sheets {
fmt.Println(sheet.Name)
for _, row := range sheet.Rows {
//row行
for _, cell := range row.Cells {
//行里面的列
text := cell.String()
fmt.Printf("%s", text)
}
}
}
}
读-封装
package main
import (
"encoding/json"
"fmt"
"github.com/tealeg/xlsx"
)
type User struct {
Name string `json:"name"`
Age string `json:"age"`
}
func main() {
excelFileName := "1.xlsx"
data, err := ReadToXlsx(excelFileName)
if err != nil {
fmt.Println(err)
return
}
b, _ := json.Marshal(data)
u := []User{}
_ = json.Unmarshal(b, &u)
fmt.Println(u)
}
func ReadToXlsx(tablename string) ([]map[string]interface{}, error) {
xlFile, err := xlsx.OpenFile(tablename) //所有的sheet
if err != nil {
return nil, err
}
titleMap := map[int]string{}
titleRow := xlFile.Sheets[0].Rows[0].Cells
for k, titlename := range titleRow {
titleMap[k] = titlename.String()
}
T := []map[string]interface{}{}
for _, sheet := range xlFile.Sheets {
//sheet表
for i := 1; i < len(sheet.Rows); i++ {
//表中的行
t := map[string]interface{}{}
for k, v := range sheet.Rows[i].Cells {
//行中的列
t[titleMap[k]] = v.Value
}
T = append(T, t)
}
}
return T, nil
}
写
func WriteToXlsx(list []Res) {
file := xlsx.NewFile()
sheet, err := file.AddSheet("Sheet1")
if err != nil {
fmt.Printf(err.Error())
return
}
row := sheet.AddRow()
cell := row.AddCell()
cell.Value = "标题1"
cell = row.AddCell()
cell.Value = "标题2"
cell = row.AddCell()
cell.Value = "标题3"
nextRow := sheet.AddRow()
cell = nextRow.AddCell()
cell.Value = "内容1"
cell = nextRow.AddCell()
cell.Value = "内容2"
cell = nextRow.AddCell()
cell.Value = "内容3"
file.Save("./1.xlsx")
}
写封装1-切片结构体转excel-直接保存
import (
"fmt"
"github.com/tealeg/xlsx"
"reflect"
)
type User struct {
Name string `json:"name"`
Age int `json:"age"`
}
func main() {
jeff := User{
Name: "jeff",
Age: 18,
}
chary := User{
Name: "chary",
Age: 20,
}
list := []interface{}{jeff, chary}
if err := WriteToXlsx(list, "2", "Sheet1"); err != nil {
fmt.Println(err)
}
}
func WriteToXlsx(T []interface{}, tableName, sheetName string) error {
t := reflect.TypeOf(T[0])
titleCount := t.NumField()
titleStrList := []string{}
for i := 0; i < titleCount; i++ {
titleStrList = append(titleStrList, t.Field(i).Name)
}
file := xlsx.NewFile()
sheet, err := file.AddSheet(sheetName)
if err != nil {
return err
}
row := sheet.AddRow()
cell := row.AddCell()
for _, titleName := range titleStrList {
cell.Value = titleName
cell = row.AddCell()
}
for i := 0; i < len(T); i++ {
r := T[i]
nextRow := sheet.AddRow()
s := reflect.ValueOf(r)
for j := 0; j < titleCount; j++ {
cell = nextRow.AddCell()
cell.Value = fmt.Sprintf("%v", s.Field(j))
}
}
if err = file.Save(fmt.Sprintf("%s.xlsx", tableName)); err != nil {
return err
}
return nil
}
写封装2-切片结构体转excel-返回文件句柄
import (
"fmt"
"github.com/tealeg/xlsx"
"reflect"
)
type User struct {
Name string `json:"name"`
Age int `json:"age"`
}
func main() {
jeff := User{
Name: "jeff",
Age: 18,
}
chary := User{
Name: "chary",
Age: 20,
}
list := []interface{}{jeff, chary}
file, err := WriteToXlsx2(list, "Sheet1")
fmt.Println(err)
fmt.Println(file)
}
func WriteToXlsx2(T []interface{}, sheetName string) (file *xlsx.File, err error) {
t := reflect.TypeOf(T[0])
titleCount := t.NumField()
titleStrList := []string{}
for i := 0; i < titleCount; i++ {
titleStrList = append(titleStrList, t.Field(i).Name)
}
file = xlsx.NewFile()
sheet, err := file.AddSheet(sheetName)
if err != nil {
return
}
row := sheet.AddRow()
cell := row.AddCell()
for _, titleName := range titleStrList {
cell.Value = titleName
cell = row.AddCell()
}
for i := 0; i < len(T); i++ {
r := T[i]
nextRow := sheet.AddRow()
s := reflect.ValueOf(r)
for j := 0; j < titleCount; j++ {
cell = nextRow.AddCell()
cell.Value = fmt.Sprintf("%v", s.Field(j))
}
}
return
}
写封装3-切片结构体转excel(带excel样式)-直接保存
func WriteToXlsx2(T []interface{}, tableName, sheetName string) error {
t := reflect.TypeOf(T[0])
titleCount := t.NumField()
type k struct {
Column string
Desc string
Width int
}
titleMap := map[string]k{}
for i := 0; i < titleCount; i++ {
s := t.Field(i).Tag.Get("excel")
st := strings.Split(s, ";")
if len(st) <= 1 {
return errors.New("检查结构体!!!")
}
width, _ := strconv.Atoi(strings.Split(st[2], ":")[1])
column := strings.Split(st[0], ":")[1]
titleMap[column] = k{
Column: strings.Split(st[0], ":")[1],
Desc: strings.Split(st[1], ":")[1],
Width: width,
}
}
titleColumnList := []string{}
maxAscii := 'A'
for column := range titleMap {
columnAscii := []rune(column)[0]
if columnAscii > maxAscii && columnAscii <= 'Z' {
maxAscii = columnAscii
}
}
for i := 'A'; i <= maxAscii; i++ {
titleColumnList = append(titleColumnList, fmt.Sprintf("%c", i))
}
file := xlsx.NewFile()
sheet, err := file.AddSheet(sheetName)
if err != nil {
return err
}
row := sheet.AddRow()
cell := row.AddCell()
nilMap := map[int]bool{}
for index, titleTypeName := range titleColumnList {
ks := titleMap[titleTypeName]
if ks.Column == "" {
nilMap[index] = true
}
sheet.SetColWidth(index, index, float64(ks.Width))
cell.Value = ks.Desc
cell = row.AddCell()
}
for i := 0; i < len(T); i++ {
r := T[i]
nextRow := sheet.AddRow()
s := reflect.ValueOf(r)
c := 0
for j := 0; j < len(titleColumnList); j++ {
if nilMap[j] == true {
cell = nextRow.AddCell()
} else {
cell = nextRow.AddCell()
value := s.Field(c)
cell.Value = fmt.Sprintf("%v", value)
c++
}
}
}
if err = file.Save(fmt.Sprintf("%s.xlsx", tableName)); err != nil {
return err
}
return nil
}
func main() {
jeff := User{
Name: "jeff",
Age: 18,
}
chary := User{
Name: "chary",
Age: 20,
}
list := []interface{}{jeff, chary}
if err := WriteToXlsx2(list, "3", "Sheet1"); err != nil {
fmt.Println(err)
}
}
写封装4-切片结构体转excel(带excel样式)-返回文件句柄
func WriteToXlsx(T []interface{}, sheetName string) (file *xlsx.File, err error) {
t := reflect.TypeOf(T[0])
titleCount := t.NumField()
type k struct {
Column string
Desc string
Width int
}
titleMap := map[string]k{}
for i := 0; i < titleCount; i++ {
s := t.Field(i).Tag.Get("excel")
st := strings.Split(s, ";")
if len(st) <= 1 {
return nil, errors.New("检查结构体!!!")
}
width, _ := strconv.Atoi(strings.Split(st[2], ":")[1])
column := strings.Split(st[0], ":")[1]
titleMap[column] = k{
Column: strings.Split(st[0], ":")[1],
Desc: strings.Split(st[1], ":")[1],
Width: width,
}
}
titleColumnList := []string{}
maxAscii := 'A'
for column := range titleMap {
columnAscii := []rune(column)[0]
if columnAscii > maxAscii && columnAscii <= 'Z' {
maxAscii = columnAscii
}
}
for i := 'A'; i <= maxAscii; i++ {
titleColumnList = append(titleColumnList, fmt.Sprintf("%c", i))
}
file = xlsx.NewFile()
sheet, err := file.AddSheet(sheetName)
if err != nil {
return nil, err
}
row := sheet.AddRow()
cell := row.AddCell()
nilMap := map[int]bool{}
for index, titleTypeName := range titleColumnList {
ks := titleMap[titleTypeName]
if ks.Column == "" {
nilMap[index] = true
}
sheet.SetColWidth(index, index, float64(ks.Width))
cell.Value = ks.Desc
cell = row.AddCell()
}
for i := 0; i < len(T); i++ {
r := T[i]
nextRow := sheet.AddRow()
s := reflect.ValueOf(r)
c := 0
for j := 0; j < len(titleColumnList); j++ {
if nilMap[j] == true {
cell = nextRow.AddCell()
} else {
cell = nextRow.AddCell()
value := s.Field(c)
cell.Value = fmt.Sprintf("%v", value)
c++
}
}
}
return file, nil
}
func main() {
jeff := User{
Name: "jeff",
Age: 18,
}
chary := User{
Name: "chary",
Age: 20,
}
list := []interface{}{jeff, chary}
file, _ := WriteToXlsx(list, "Sheet1")
fmt.Println(file)
}
context读取excel,封装
import (
"fmt"
"path"
"strconv"
"github.com/gin-gonic/gin"
"github.com/guyouyin123/tools/qtable"
)
func getFileExcelData(c *gin.Context) (shipType int, data []map[string]string, fileName string, err error) {
shipTypeStr := c.PostForm("shipType")
shipType, err = strconv.Atoi(shipTypeStr)
if err != nil {
err = fmt.Errorf("ship_type类型必须为int型")
fmt.Println(err)
return 0, nil, "", err
}
mf, err := c.FormFile("file")
if err != nil {
fmt.Println(err)
return 0, nil, "", err
}
fh, err := mf.Open()
if err != nil {
fmt.Println(err)
return 0, nil, "", err
}
defer fh.Close()
table, err := qtable.TableRead(path.Ext(mf.Filename))
if err != nil {
fmt.Println(err)
return 0, nil, "", err
}
data, err = table.Read(fh)
if len(data) == 0 {
err = fmt.Errorf("未读取到数据,请检查数据")
fmt.Println(err)
return 0, nil, "", err
}
fileName = mf.Filename
return shipType, data, fileName, nil
}
写入csv文件
func writeCsv() {
file, err := os.Create("data.csv")
if err != nil {
panic(err)
}
defer file.Close()
// 创建 CSV 写入器
writer := csv.NewWriter(file)
defer writer.Flush()
// 写入表头数据
header := []string{"Name", "Age", "Gender"}
writer.Write(header)
// 写入数据行
data := [][]string{
{"Alice", "18", "Female"},
{"Bob", "22", "Male"},
{"Cathy", "27", "Female"},
}
for _, row := range data {
writer.Write(row)
}
}
4.input输入
func input(prompt string) string {
var text string
fmt.Print(prompt)
fmt.Scan(&text)
return text
}
func main() {
a := input("你输入的名称:")
b := input("你输入的性别:")
c := input("你输入的年龄:")
fmt.Printf("%v, %v, %v", a, b, c)
}
5.RsplitN右切
// RsplitN 切割字符串,从右边开始切割指定次数,并保留切割字符
func RsplitN(s string, delimiter string, cuts int) []string {
if cuts == 0 {
return []string{s}
}
// 从右向左切割字符串
parts := strings.Split(s, delimiter)
lenth := len(parts)
// 如果切割次数超过了部分数量,限制为部分数量
if cuts > lenth {
cuts = len(parts)
}
tmp := lenth - cuts
// 反转切割后的部分以从右边开始取
result := make([]string, 0)
st := ""
for k, v := range parts {
if k < tmp {
st += v + delimiter
} else {
if st != "" {
result = append(result, st)
st = ""
}
result = append(result, v)
}
}
return result
}
6.web展示图片
web展示图片--图片链接
package main
import (
"fmt"
"log"
"net/http"
)
func main() {
imgList := []string{"https://123.png", "https://456.png", "https://789.png"}
// 注册 HTTP 处理函数
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
// 设置响应头
w.Header().Set("Content-Type", "text/html")
imgStr := "<img src=\"%s\">"
img := ""
for _, v := range imgList {
img += fmt.Sprintf(imgStr, v)
}
// 编写 HTML 页面
fmt.Fprintf(w, `
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Image Demo</title>
</head>
<body>
<h1>图片素材</h1>
%s
</body>
</html>
`, img)
})
// 启动 Web 服务器
log.Println("Server listening on :8080")
err := http.ListenAndServe(":8080", nil)
if err != nil {
log.Fatal(err)
}
}
web展示图片--图片文件
import (
"encoding/base64"
"fmt"
"io/ioutil"
"log"
"net/http"
)
func main() {
// 读取图片文件
imageFile, err := ioutil.ReadFile("/test/123.jpeg")
if err != nil {
log.Fatal(err)
}
// 注册 HTTP 处理函数
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
// 设置响应头
w.Header().Set("Content-Type", "text/html")
// 编写 HTML 页面
fmt.Fprintf(w, `
<!DOCTYPE html>
<html>
<head>
<title>图片素材</title>
</head>
<body>
<h1>图片素材</h1>
<img src="data:image/jpeg;base64,%s">
</body>
</html>
`, base64.StdEncoding.EncodeToString(imageFile))
})
// 启动 Web 服务器
log.Println("Server listening on :8080")
err = http.ListenAndServe(":8080", nil)
if err != nil {
log.Fatal(err)
}
}
7.读取yaml配置文件
user.yaml
name: "jeff"
age: 18
other:
sex: "男"
like: "篮球"
方法 1--github.com/xbitgo/core/config
package main
import (
"fmt"
"github.com/xbitgo/core/config"
)
type Config struct {
Name string `yaml:"name"`
Age int `yaml:"age"`
Other *Other `yaml:"other"`
}
type Other struct {
Sex string `yaml:"sex"`
Like string `yaml:"like"`
}
func main() {
var Global = &Config{}
cfg := config.Yaml{ConfigFile: "user.yaml"}
err := cfg.Apply(Global)
if err != nil {
fmt.Println("err:", err.Error())
}
fmt.Println(Global)
fmt.Println(Global.Other)
}
方法 2--viper
func main() {
viper.SetConfigFile("./user.yaml")
err := viper.ReadInConfig() // 查找并读取配置文件
if err != nil { // 处理读取配置文件的错误
panic(fmt.Errorf("Fatal error config file: %s \n", err))
}
fmt.Println(viper.GetString("name"))
}
方法 3--go-zero/core/conf包
package main
import (
"flag"
"fmt"
"github.com/zeromicro/go-zero/core/conf"
)
type Config struct {
Name string
Age int
Other Other
}
type Other struct {
Sex string
Like string
}
func main() {
var configFile = flag.String("f", "./golang/user.yaml", "the config file")
var c Config
MustLoad(*configFile, &c)
fmt.Println(c)
}
func MustLoad(arg string, rs interface{}) {
conf.MustLoad(arg, rs)
}
8.mysql bin日志解析
//这将显示数据目录的位置,binlog文件通常位于该目录下
SHOW VARIABLES LIKE 'datadir';
package main
import (
"fmt"
"github.com/go-mysql-org/go-mysql/replication"
"time"
)
func main() {
fileName := "./mysql-bin.001234"
ParseBinlog(fileName)
}
func ParseBinlog(binlogFile string) {
binlogParser := replication.NewBinlogParser()
err := binlogParser.ParseFile(binlogFile, 0, func(event *replication.BinlogEvent) error {
switch e := event.Event.(type) {
case *replication.RowsEvent:
switch event.Header.EventType {
case replication.DELETE_ROWS_EVENTv0, replication.DELETE_ROWS_EVENTv1, replication.DELETE_ROWS_EVENTv2:
//delete事件数据 一次批量过多,rows一条记录中有多条,需要解开
rows := e.Rows[0]
fmt.Println(rows) //发送chan处理,(写进表或者其他)
time.Sleep(time.Second * 3)
case replication.UPDATE_ROWS_EVENTv0, replication.UPDATE_ROWS_EVENTv1, replication.UPDATE_ROWS_EVENTv2:
//update事件数据
rows := e.Rows[0]
fmt.Println(rows) //发送chan处理,(写进表或者其他)
time.Sleep(time.Second * 3)
case replication.WRITE_ROWS_EVENTv0, replication.WRITE_ROWS_EVENTv1, replication.WRITE_ROWS_EVENTv2:
//instert事件
rows := e.Rows[0]
fmt.Println(rows) //发送chan处理,(写进表或者其他)
time.Sleep(time.Second * 3)
}
}
return nil
},
)
fmt.Println(err)
}
9.限频工具
//api限流
func ApiPeriodLimit(uidStr, url string, limitNumberDay, exSecond int) error {
/*
uidStr:可以填写uid,没有uid可以填写手机号
url:路由
exSecond:访问频率,单位秒
limitNumber:访问次数限制,单位天
注:访问数据保留 7 天自动删除
*/
/*
redis的key:
上次请求记录string--last:{{uid}}:{{url}} 过期时间=exSecond
日次数hash--apiLimitHash:{{url}}:{{day}} key=uid,value=次数
日黑名单hash--backApi:{{url}}:{{day}} key=uid,value=1
1.日黑名单校验
2.setnx成功就继续
3.每次成功请求日次数+1
4.日次数超过限制,加入日黑名单
*/
rdb := config.GetRedis()
now := time.Now()
day := now.Format("2006-01-02")
t, _ := time.ParseInLocation("2006-01-02", day, time.Local)
exDay := t.AddDate(0, 0, 7) //过期时间7天后
//1.日黑名单校验
backApi := fmt.Sprintf("backApi:%s:%s", url, day)
v, _ := rdb.Hget(backApi, uidStr)
if v != "" {
return common.OperateError("今日已超过限制")
}
//2.setnx成功就继续
exKey := fmt.Sprintf("last:%s:%s", uidStr, url)
boo, _ := rdb.SetnxEx(exKey, "1", exSecond)
if !boo {
return common.OperateError("操作频繁 请稍候再试")
}
//3.每次成功请求日次数+1,日次数超过限制,加入日黑名单
apiLimitHash := fmt.Sprintf("apiLimitHash:%s:%s", url, day)
countStr, _ := rdb.Hget(apiLimitHash, uidStr)
count, _ := strconv.Atoi(countStr)
count += 1
rdb.Hset(apiLimitHash, uidStr, fmt.Sprintf("%d", count))
rdb.Expireat(apiLimitHash, exDay.Unix())
//4.日次数超过限制,加入日黑名单
if count >= limitNumberDay {
rdb.Hset(backApi, uidStr, "1")
rdb.Expireat(backApi, exDay.Unix())
}
return nil
}
10.频率统计
//频率统计
func ApiRate2(uid int64, url string) error {
rdb := config.GetRedis()
userID := fmt.Sprintf("rate:%s:%d", url, uid)
// 获取当前时间
now := time.Now().UnixNano()
// 生成当前时间窗口的起始时间和结束时间
windowStart := (now) / 1000
windowEnd := windowStart - 300*(1000000)
// 移除过期的窗口数据
rdb.Zremrangebyscore(userID, 0, windowEnd)
// 增加当前请求的分数,并设置过期时间
rdb.Zincrby(userID, windowStart, fmt.Sprintf("%d", now))
rdb.Expire(userID, 300)
// 获取窗口内的请求次数
requestCount, err := rdb.Zcount(userID, windowEnd, windowStart)
if err != nil {
// 错误处理
return nil
}
// 打印用户每分钟的请求次数
// 保存次数
backHash := fmt.Sprintf("backHash:%s", url)
err = rdb.Hset(backHash, fmt.Sprintf("%d", uid), fmt.Sprintf("%d_%d", requestCount, time.Now().Unix()))
if err != nil {
return nil
}
// 如果请求次数超过阈值,则拒绝访问
if requestCount >= 100 {
key := fmt.Sprintf("%s:%d", limitBlackKey, uid)
msg := "外挂"
config.GetRedisLimit().Setex(key, msg, 2*3600)
return msg)\
}
return nil
}
11.golang执行lua脚本操作redis
package main
import (
"context"
"fmt"
"github.com/go-redis/redis/v8"
"time"
)
func start() {
// 创建 Redis 客户端
client := redis.NewClient(&redis.Options{
Addr: "localhost:6379", // Redis 服务器地址
Password: "", // Redis 密码
DB: 0, // Redis 数据库索引
})
// 加锁
key := "mylock" // 锁的键名
value := "myvalue" // 锁的值
expiration := 10 * time.Second // 锁的过期时间
success, err := acquireLock(client, key, value, expiration)
if err != nil {
fmt.Println("加锁失败:", err)
return
}
if success {
fmt.Println("加锁成功")
// 执行加锁成功后的操作
// ...
// 解锁
err := releaseLock(client, key)
if err != nil {
fmt.Println("解锁失败:", err)
} else {
fmt.Println("解锁成功")
}
} else {
fmt.Println("加锁失败")
}
}
func acquireLock(client *redis.Client, key string, value string, expiration time.Duration) (bool, error) {
script := `
local result = redis.call('SET', KEYS[1], ARGV[1], 'NX', 'PX', ARGV[2])
if result then
return true
else
return false
end
`
result, err := client.Eval(context.Background(), script, []string{key}, value, expiration).Bool()
if err != nil {
return false, err
}
return result, nil
}
func releaseLock(client *redis.Client, key string) error {
_, err := client.Del(context.Background(), key).Result()
return err
}
func main() {
start()
}
12.golang执行lua脚本
package main
import (
"fmt"
"gopkg.in/koyhoge/lua.v3"
)
func main() {
// 创建Lua状态机
l := lua.NewState()
defer l.Close()
// 加载Lua脚本
script := `print("Hello from Lua!")`
// 执行Lua脚本
err := l.DoString(script)
if err != nil {
fmt.Println("执行脚本时出错:", err)
return
}
}
13.执行sql
import (
"database/sql"
"encoding/json"
"fmt"
_ "github.com/go-sql-driver/mysql"
"github.com/tealeg/xlsx"
)
var DB *sql.DB //全局对象db
// 测试环境直连
func InitDB() (err error) {
DB, err = sql.Open("mysql", "root:123456@tcp(127.0.0.1:3306)/myself?charset=utf8mb4&interpolateParams=true&parseTime=true&loc=Local")
if err != nil {
return err
}
err = DB.Ping() //校验数据库连接
if err != nil {
return err
}
return nil
}
func Insert(i *Data) (r int) {
sqlstr := `insert into user(name,age,email)
values('%v','%v','%v','%v','%v','%v')`
sqlstr = fmt.Sprintf(sqlstr, i.RealName, i.Age, i.Email)
err := DB.QueryRow(sqlstr).Scan(&r)
if err != nil {
fmt.Println(err)
return
}
return
}
14.oss上传工具
var OssClient *AliOSS
func InitOss() {
AccessKeyId := "AccessKeyId"
AccessKeySecret := "AccessKeySecret"
Endpoint := "http://oss-cn-shanghai.aliyuncs.com"
client, err := NewAliOSS(AliConfig{
AccessKeyId: AccessKeyId,
AccessKeySecret: AccessKeySecret,
Endpoint: Endpoint,
})
if err != nil {
panic(err)
}
OssClient = client
}
func PutObjectBase64(ctx context.Context, base64Str string, imgName string) (string, error) {
imgName = fmt.Sprintf("images/%s", imgName)
// 获取存储空间(Bucket)对象
bucket, err := OssClient.client.Bucket(bucketName)
if err != nil {
fmt.Println("Error getting bucket:", err)
return "", nil
}
sDec, err := base64.StdEncoding.DecodeString(base64Str)
reader := bytes.NewReader(sDec)
err = bucket.PutObject(imgName, reader)
if err != nil {
fmt.Println("Error uploading file:", err)
return "", nil
}
url := "https://img.xoxox.com/"
url += imgName
return url, nil
}
15.Redis 的 WATCH 命令来实现乐观锁
Redis的WATCH命令本身并不是一个原子操作。它仅用于在执行事务期间监视指定的键,以便在事务执行期间检测到键是否被修改。WATCH命令的作用是在执行EXEC命令之前,对被监视的键进行检查,如果在WATCH命令执行后,到EXEC命令执行期间,被监视的键被其他客户端修改了,那么事务将被中断。
package main
import (
"fmt"
"log"
"github.com/go-redis/redis"
)
func main() {
// 创建 Redis 客户端
client := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
Password: "", // 如果有密码,填写密码
DB: 0, // 选择数据库
})
// 监视库存的键
err := client.Watch(func(tx *redis.Tx) error {
// 获取当前库存余额
val, err := tx.Get("stock").Result()
if err != nil {
return err
}
stock, _ := strconv.Atoi(val)
// 检查库存是否充足
if stock > 0 {
// 执行更新库存的事务
_, err = tx.TxPipelined(func(pipe redis.Pipeliner) error {
pipe.Decr("stock")
return nil
})
if err != nil {
return err
}
fmt.Println("库存减少成功!")
} else {
fmt.Println("库存不足!")
}
return nil
}, "stock") // 监视库存的键
if err != nil {
log.Fatal(err)
}
}
16.优雅关闭处理
func main() {
//写入关闭信号
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
// 监听关闭信号
go func() {
sig := <-sigs
fmt.Println("接收到关闭信号,处理逻辑:", sig)
// 执行优雅关闭操作
}()
select {}
}
python工具
1.excel操作工具
1.1excel转列表
"""
wx_wutu_readnum.xls表
第一列:project_id
第二列:read_num
第三列:like_num
"""
def get_id_excel():
xlsfile = r"./wx_wutu_readnum.xls" # 打开指定路径中的xls文件
file = xlrd.open_workbook(xlsfile) # 得到Excel文件的book对象,实例化对象
# 获取sheet
sheet0 = file.sheets()[0] # 通过sheet索引顺序获取
project_id = sheet0.col_values(0) # 读取第1列
read_num = sheet0.col_values(1) # 读取第2列
like_num = sheet0.col_values(2) # 读取第3列
lis = list(zip(project_id, read_num, like_num))
return lis
1.2.列表写入excel
def index(sql_list, monitor_dic):
import xlwt
file = xlwt.Workbook(encoding='utf-8', style_compression=0)
name = '2020上半年头条阅读为0'
# 一张表生成不同的sheet
sheet0 = file.add_sheet(name, cell_overwrite_ok=True) # 产生表
sheet0.write(0, 0, 'reserve_id')
sheet0.write(0, 1, 'project_id')
sheet0.write(0, 2, 'begin_time')
sheet0.write(0, 3, 'end_time')
sheet0.write(0, 4, 'media_id')
sheet0.write(0, 5, 'media_name')
sheet0.write(0, 6, 'post_id')
sheet0.write(0, 7, 'post_title')
sheet0.write(0, 8, 'view_count')
sheet0.write(0, 9, 'comment_count')
for index, test in enumerate(sql_list):
reserve_id = test[23]
begin_time = str(test[4])
end_time = str(test[5])
media_id = test[8]
media_name = test[9]
post_id = test[11]
post_title = test[12]
project_id = test[1]
sheet0.write(index + 1, 0, reserve_id)
sheet0.write(index + 1, 1, project_id)
sheet0.write(index + 1, 2, begin_time)
sheet0.write(index + 1, 3, end_time)
sheet0.write(index + 1, 4, media_id)
sheet0.write(index + 1, 5, media_name)
sheet0.write(index + 1, 6, post_id)
sheet0.write(index + 1, 7, post_title)
sheet0.write(index + 1, 8, monitor_dic.get(post_id)[0])
sheet0.write(index + 1, 9, monitor_dic.get(post_id)[1])
file.save('2020上半年.xls')
1.3csv转insert(sql)
def get_csv_file(file_path):
dic_list = []
with open(file_path, 'r') as f:
key_list = f.readline().replace('"', "").replace("\n", "").split(",")
while True:
dic = {}
line = f.readline().replace('"', "")
if len(line) < 1:
break
line_list = line.split(",")
for k, v in enumerate(line_list):
dic[key_list[k]] = v.replace("\n", "")
dic_list.append(dic)
return dic_list
def convert_insert_sql(dic_list, table_name):
with open('sql.txt', 'w') as f:
for dic in dic_list:
keys = ""
values = ""
for k in dic:
v = dic.get(k)
if v != "NULL":
try:
v = int(v)
values += f'{v},'
except Exception as e:
values += f"'{v}',"
else:
values += f'{v},'
keys += f"{k},"
keys = keys.rstrip(",")
values = values.rstrip(",")
sql = f"insert into {table_name}({keys}) values ({values});\n"
f.write(sql)
if __name__ == '__main__':
table_name = "users"
file_path = "/Users/jeff/Desktop/file_test/工具/data.csv"
li_list = get_csv_file(file_path)
convert_insert_sql(li_list, table_name)
1.4读取excel,csv
import csv
import xlrd
def get_csv():
with open('./name.csv', 'r') as f:
reader = csv.reader(f)
rows = list(reader)
dic = {}
for row in rows[1:len(rows)]:
dic[row[0]]=row[4]
return dic
def get_excel():
xlsfile = r"./name.xlsx"
file = xlrd.open_workbook(xlsfile)
sheet0 = file.sheets()[0]
nrows = sheet0.nrows
li_list = []
for i in range(1, nrows):
row = sheet0.row_values(i)
li_list.append(int(row[2]))
return li_list
def main():
dic = get_csv()
list = get_excel()
with open(r"./new.csv","w") as f:
for no in list:
last_time = dic.get(str(no))
f.write(f"{no},{last_time}\n")
if __name__ == '__main__':
main()
2.文件操作
2.1前后加引号,where in
def main():
with open(r"./2.txt","w") as k: #新文件
with open("./1.txt") as f: #源文件
while True:
v = f.readline().strip()
print(v)
if len(v)<1:
return
k.write(f"'{v}',\n")
3.时间操作
3.1时间范围
# 时间范围
def get_time():
now = time.time() # 当前时间戳
# now = now - 30 * 24 * 60 * 60
# mon = time.localtime(now)[1]
# day = time.localtime(now)[2]
# year = time.localtime(now)[0]
# end_time = "%s-%s-%s" % (year, mon, day)
end_time = convert_data_time(now)
twl = now - 3 * 24 * 60 * 60 # 30天前的时间戳
# mon1 = time.localtime(twl)[1] # 从30天前时间结构体中提取月
# day1 = time.localtime(twl)[2] # 从30天前时间结构体中提取日
# year1 = time.localtime(twl)[0]
# start_time = "%s-%s-%s" % (year1, mon1, day1)
start_time = convert_data_time(twl)
return [start_time, end_time]
3.2字符串标准时间转-时间戳
# 转换时间戳
def timestamp(convert_time):
timeArray = time.strptime(convert_time, "%Y-%m-%d %H:%M:%S")
timestamp = int(time.mktime(timeArray))
return timestamp
3.3时间戳转转-标准时间
# 时间戳转换标准时间
def convert_data_time(timeStamp):
timeArray = time.localtime(timeStamp)
data_time = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
return data_time
datetime.datetime.fromtimestamp(时间戳) # 时间戳转换为datime格式
4.队列工具
4.1.内存队列
# 获取mid队列
def get_mid_queue():
conn = psycopg2.connect()
cur = conn.cursor()
sql = 'select mid from bevoldb.beautiful_shop where id between 80000 and 90000'
# cur.execute(sql, ['39600'])
cur.execute(sql)
mid_list = cur.fetchall()
mid_queue = Queue()
for mid in mid_list:
mid1 = mid[0]
mid_queue.put(mid1)
return mid_queue
4.2.多线程配合内存队列
# 新的评论全自动入口
def start_commnet():
conn = psycopg2.connect()
cur = conn.cursor()
for i in range(1, 100):
a = (i-1)*10000
b = i*10000
sql = 'select mid from bevoldb.beautiful_shop where id between {} and {}'.format(a, b)
cur.execute(sql)
mid_list = cur.fetchall()
mid_queue = Queue()
for mid in mid_list:
mid1 = mid[0]
mid_queue.put(mid1)
for i in range(64):
save_img = Thread(target=get_commnet, args=(mid_queue,))
save_img.start()
4.3pika操作MQ(生产者)
def toutiao_win():
credentials = pika.PlainCredentials('用户名, '密码') # mq用户名和密码
# 虚拟队列需要指定参数 virtual_host,如果是默认的可以不填。
connection = pika.BlockingConnection(
pika.ConnectionParameters(host='地址', port=端口, virtual_host='/', credentials=credentials))
channel = connection.channel()
toutiao_ttcid_scid_token_list, toutiao_p_callback_list, ua_list = get_cookie_list()
# 声明消息队列,消息将在这个队列传递,如不存在,则创建
# result = channel.queue_declare(queue='队列名'
res1 = rds_16_8.sunion("xx","xx")
a = 0
for user_id in res1:
data = ""
a += 1
user_id = str(user_id.decode())
url = ""
data['url'] = url
# print(data)
# if a <= 30000:
# 向队列插入数值 routing_key是队列名
channel.basic_publish(exchange='', routing_key='队列名', body=json.dumps(data))
connection.close()
4.4pika操作MQ(消费者)
def xiaofei_toutiao():
credentials = pika.PlainCredentials('用户名', '密码') # mq用户名和密码
connection = pika.BlockingConnection(
pika.ConnectionParameters(host='地址', port=端口, virtual_host='/', credentials=credentials))
channel = connection.channel()
# 申明消息队列,消息在这个队列传递,如果不存在,则创建队列
# channel.queue_declare(queue='队列名', durable=False)
# 定义一个回调函数来处理消息队列中的消息,这里是打印出来
def callback(ch, method, properties, body):
ch.basic_ack(delivery_tag=method.delivery_tag)
k = body.decode()
user_id = re.search('user_id=(.*?)&', k).group(1)
rds_16_8.hset('key名', user_id, k)
# 告诉rabbitmq,用callback来接收消息
channel.basic_consume('消费队列名', callback)
# 开始接收信息,并进入阻塞状态,队列里有信息才会调用callback进行处理
channel.start_consuming()
5.Celery操作
5.1CeleryAPP--MQ创建
from celery import Celery
# 入es的app
ins_data_app = Celery(
'instagram',
include=[
'spider.instagram.spider.task',
]
)
ins_data_app.config_from_object(DataEsConfig)
class DataEsConfig(object):
broker_url = 'amqp://admin:avGX9IHGeUVVcs5B@118.190.112.223:5672/'
task_ignore_result = True
task_serializer = 'json'
accept_content = ['json']
task_default_queue = 'default'
task_default_exchange = 'default'
task_default_routing_key = 'default'
exchange = Exchange('instagram', type='topic')
task_queues = [
Queue(
'instagram.attention.user.producer',
[binding(exchange, routing_key='instagram.attention.user.producer')],
# queue_arguments={'x-queue-mode': 'lazy'}
),
Queue(
'instagram.attention.data.producer',
[binding(exchange, routing_key='instagram.attention.data.producer')],
# queue_arguments={'x-queue-mode': 'lazy'}
),
]
task_routes = (data_es_route_task,)
enable_utc = True
timezone = "Asia/Shanghai"
from kombu import Exchange, Queue, binding
def data_es_route_task(name, args, kwargs, options, task=None, **kw):
return {
'exchange': 'instagram',
'exchange_type': 'topic',
'routing_key': name
}
class DataEsConfig(object):
broker_url = '' # 地址
task_ignore_result = True
task_serializer = 'json'
accept_content = ['json']
task_default_queue = 'default'
task_default_exchange = 'default'
task_default_routing_key = 'default'
exchange = Exchange('instagram', type='topic')
task_queues = [
Queue(
'instagram.attention.user.producer',
[binding(exchange, routing_key='instagram.attention.user.producer')],
queue_arguments={'x-queue-mode': 'lazy'}
),
Queue(
'instagram.attention.data.producer',
[binding(exchange, routing_key='instagram.attention.data.producer')],
queue_arguments={'x-queue-mode': 'lazy'}
),
]
task_routes = (data_es_route_task,)
enable_utc = True
timezone = "Asia/Shanghai"
5.2Celery--APP--MQ使用--消费
# 1.比如从队列里拿出user_id消费
# 2.消费完获取到数据,发送给其他的队列
# 3.入库程序取出数据,入库
@ins_app.task(
name="instagram.data.user.consumer", # 队列名称
bind=True,
default_retry_delay=5,
max_retries=3,
)
def start_articles(self, *args, **kwargs):
id = args[0]
start_time = time.time()
articles.go_article(id) # 方法
end_time = time.time()
print('总体响应时间:',end_time-start_time)
print('=='*50)
5.3Celery--APP--MQ使用--生产
# 发送作者关注数据一页
def send_attention(response):
dics = filter.attention_filter(response)
ins_data_app.send_task(
'instagram.attention.user.producer',
kwargs={'data': dics, 'post_time': int(time.time())}, retries=1
)
6.ES操作
6.1es模型类
es_zoo_connection = Elasticsearch('http://用户名:密码@地址:端口', dead_timeout=10, retry_on_timeout=True)
logger = logging.getLogger(__name__)
class ES(object):
index = None
doc_type = None
id_field = '_id'
version = ''
source_id_field = ''
aliase_field = ''
separator = '-'
aliase_func = None
es = None
tz = pytz.timezone('Asia/Shanghai')
logger = logger
@classmethod
def mget(cls, ids=None, index=None, **kwargs):
index = index or cls.index
docs = cls.es.mget(body={'ids': ids}, doc_type=cls.doc_type, index=index, **kwargs)
return docs
@classmethod
def count(cls, query=None, index=None, **kwargs):
index = index or cls.index
c = cls.es.count(doc_type=cls.doc_type, body=query, index=index, **kwargs)
return c.get('count', 0)
@classmethod
def upsert(cls, doc, doc_id=None, index=None, doc_as_upsert=True, **kwargs):
body = {
"doc": doc,
}
if doc_as_upsert:
body['doc_as_upsert'] = True
id = doc_id or cls.id_name(doc)
index = index or cls.index_name(doc)
cls.es.update(index, id, cls.doc_type, body, **kwargs)
@classmethod
def search(cls, index=None, query=None, **kwargs):
index = index or cls.index
return cls.es.search(index=index, body=query, **kwargs)
@classmethod
def scan(cls, query, index=None, **kwargs):
return scan(cls.es,
query=query,
index=index or cls.index,
**kwargs)
@classmethod
def index_name(cls, doc):
if cls.aliase_field and cls.aliase_field in doc.keys():
aliase_part = doc[cls.aliase_field]
if isinstance(aliase_part, str):
aliase_part = arrow.get(aliase_part)
if isinstance(aliase_part, int):
aliase_part = arrow.get(aliase_part).astimezone(cls.tz)
if cls.version:
index = '{}{}{}{}{}'.format(cls.index, cls.separator, cls.version, cls.separator,
cls.aliase_func(aliase_part))
else:
index = '{}{}{}'.format(cls.index, cls.separator, cls.aliase_func(aliase_part))
else:
index = cls.index
return index
@classmethod
def id_name(cls, doc):
id = doc.get(cls.id_field) and doc.pop(cls.id_field) or doc.get(cls.source_id_field)
if not id:
print('========', doc)
assert id, 'doc _id must not be None'
return id
@classmethod
def bulk_upsert(cls, docs, **kwargs):
"""
批量操作文章, 仅支持 index 和 update
"""
op_type = kwargs.get('op_type') or 'update'
chunk_size = kwargs.get('chunk_size')
if op_type == 'update':
upsert = kwargs.get('upsert', True)
if upsert is None:
upsert = True
else:
upsert = False
actions = cls._gen_bulk_actions(docs, cls.index_name, cls.doc_type, cls.id_name, op_type, upsert=upsert)
result = streaming_bulk(cls.es, actions, chunk_size=chunk_size, raise_on_error=False, raise_on_exception=False,
max_retries=5, request_timeout=25)
return result
@classmethod
def _gen_bulk_actions(cls, docs, index_name, doc_type, id_name, op_type, upsert=True, **kwargs):
assert not upsert or (upsert and op_type == 'update'), 'upsert should use "update" as op_type'
for doc in docs:
# 支持 index_name 作为一个工厂函数
if callable(index_name):
index = index_name(doc)
else:
index = index_name
if op_type == 'index':
_source = doc
elif op_type == 'update' and not upsert:
_source = {'doc': doc}
elif op_type == 'update' and upsert:
_source = {'doc': doc, 'doc_as_upsert': True}
else:
continue
if callable(id_name):
id = id_name(doc)
else:
id = id_name
# 生成 Bulk 动作
action = {
"_op_type": op_type,
"_index": index,
"_type": doc_type,
"_id": id,
"_source": _source
}
yield action
class toutiaoEsUser(ES):
index = 'user_toutiao-v1_0'
doc_type = '_doc'
id_field = '_id'
source_id_field = 'user_id'
es = es_zoo_connection
def get_follower_num(user_id):
try:
a = toutiaoEsUser.scan(
query={
"query": {
"match": {
"user_id": f"{user_id}"
}
}}
)
data = a.__next__()
follower_num = data.get('_source').get('followers_count')
return follower_num
except:
return None
7.爬虫
7.1下载图片
def download_image(dirname, video_id, image_url):
"""
dirname:目录名字
video_id:保存图片到名字
image_url:图片下载地址
"""
video_id = str(video_id).strip()
BASE_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "image", dirname)
if not os.path.isdir(BASE_PATH):
os.mkdir(BASE_PATH)
image_path = os.path.join(BASE_PATH, video_id)+".jpg"
for i in range(10):
try:
response = requests.get(image_url, timeout=10)
if response.status_code != 200:
continue
data = response.iter_content()
with open(image_path, 'wb')as f:
for line in data:
f.write(line)
f.close()
break
except Exception as e:
print(e)
print(image_url)
continue
选择了IT,必定终身学习