package main
import (
"fmt"
_"flag"
_ "os"
_ "io/ioutil"
_"strings"
_ "path"
"log"
_ "baliance.com/gooxml/document"
"database/sql"
_ "github.com/go-sql-driver/mysql"
"time"
"github.com/yanyiwu/gosimhash"
)
func main(){
t1 := time.Now()
Mylog(doc)
if err != nil {
Mylog(err)
}
db, err := sql.Open("mysql", "root:123456@tcp(127.0.0.1:3306)/gzpg_crs_jsj?charset=utf8");
if err != nil {
fmt.Println(err);
}
sql :="select s1.paper_id,s2.title_cn,s2.abstract_cn,s2.keyword_cn,s2.title_en,s2.abstract_en,s2.keyword_en,s1.s_content from sf_content s1,sf_paper s2 where s1.paper_id=s2.paper_id limit 10"
rows, err := db.Query(sql)
if err != nil {
fmt.Println(err);
}
stmt, err := db.Prepare("INSERT sim_path SET paperid=?,simcode=?")
if err != nil {
fmt.Println(err);
}
var str string
var code string
//查询多个
for rows.Next() {
var paper_id int //论文id
var title_cn string //中文题目
var abstract_cn string //中文摘要
var keyword_cn string //中文关键词
var title_en string //英文题目
var abstract_en string //英文摘要
var keyword_en string //英文关键词
var s_content string//全文内容
err = rows.Scan(&paper_id, &title_cn,&abstract_cn,&keyword_cn,&title_en,&abstract_en,&keyword_en,&s_content)
str = fmt.Sprintf("%s\n 摘要:%s\n 关键词:%s\n %s\n Abstract:%s\n Keywords:%s\n %s\n",title_cn,abstract_cn,keyword_cn,title_en,abstract_en,keyword_en,s_content)
code=simhash(str)
res, err := stmt.Exec(paper_id, code)
if err != nil {
fmt.Println(err);
}
id, err := res.LastInsertId()
if err != nil {
fmt.Println(err);
}
fmt.Print("%s成功%s \n",id,paper_id);
}
db.Close()
elapsed := time.Since(t1)
log.Println("时间花费位:\n" , elapsed)
}
func simhash(str string) (string) {
hasher := gosimhash.New("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8")
defer hasher.Free()
fingerprint := hasher.MakeSimhash(str, 1)
var code string
var s string = "0000000000000000000000000000000000000000000000000000000000000000"
bs := []byte(s)
for i := 63; i >= 0; i-- {
if (fingerprint&1)==1 {
bs[i]='1'
} else {
bs[i]='0'
}
fingerprint >>=1
}
code =string(bs)
return code
}
func Mylog(v ...interface{}) {
f, err := os.OpenFile("20181105go.log", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
Mylog(err)
}
defer f.Close()
logger := log.New(f, TAG, log.Ldate|log.Ltime|log.Lmicroseconds)
logger.Println(v...)
}