使用terraform 进行gitlab 代码仓库批量迁移
gitlab 的代码是在文件目录中,这个对于批量迁移很简单,只需要copy 文件夹(但是对于不同gitlab server 可能需要重新设置目录权限)
几个问题
- 大批量仓库tf resource问题
直接使用默认的不是很好,需要配置的比较多,解决方法,可以通过直接查询数据库数据,使用模版引擎生成tf 文件,可选的开发语言很多,但是使用nodejs
开发可能会比较方便的
- 仓库代码拷贝问题
推荐通过rsync 等工具,进行数据同步,因为git repo 存在软连接的问题,一般的scp 不能附带信息
- tf 模版编写
因为可能存在多层级group,所以需要查询原有gitlab pg 数据库进行分析处理,这个分析下sql 就可以了(通过自关联查询,多级处理)
- 用户权限处理
这个稍有复杂,tf gitlab provider 提供了用户权限操作的,但是对于不用的gitlab server 系统层面只识别的是用户id,但是这个id 是系统生成的
这个可以通过email 等能确认是唯一的进行关联处理,通过查询,在模版生成的时候指定用户id
一个参考实现
- 模版处理
通过nodejs,为了方便管理,对于仓库信息通过rest 接口暴露,使用dotenv 管理环境变量
yarn add node-fetch mustache dotenv
app.js: 模版生成代码
const fetch = require('node-fetch');
require("dotenv").config()
const Mustache = require("mustache");
const fs = require("fs")
fetch(process.env.gitlab_api)
.then(res => res.json())
.then(body => {
fs.readFile("template/temp.mu", {
encoding: "utf8"
}, (err,result) => {
if (err) {
console.log("some wrong")
} else {
let output = Mustache.render(result, body);
console.log(output)
}
})
});
模版:
provider "gitlab" {
base_url = "http://gitlabserver/api/v4/"
token = "${var.gitlab_token}"
}
{{#data.groups_parent}}
resource "gitlab_group" "{{name}}" {
name = "{{name}}"
path = "{{path}}"
description = "{{description}}"
}
{{/data.groups_parent}}
{{#data.subgroups}}
resource "gitlab_group" "{{name}}" {
name = "{{name}}"
path = "{{path}}"
description = "{{description}}"
parent_id = "${gitlab_group.{{parent}}.id}"
}
{{/data.subgroups}}
{{#data.projects}}
resource "gitlab_project" "{{name}}" {
name = "{{name}}"
namespace_id = "${gitlab_group.{{group}}.id}"
}
{{/data.projects}}
仓库代码restapi格式:
{
"data": {
"projects": [
{
"name": "firstrong",
"path":"firstrong",
"description":"firstrong",
"group" : "firstrong"
},
{
"name": "secondrong",
"path":"secondrong",
"description":"secondrong",
"group": "secondrong"
},
{
"name": "thirdrong",
"path":"thirdrong",
"description":"thirdrong",
"group": "thirdrong"
}
],
"groups_parent":[
{
"name":"firstrong",
"path":"firstrong",
"description":"firstrong"
},
{
"name":"secondrong",
"path":"secondrong",
"description":"secondrong"
},
{
"name":"thirdrong",
"path":"thirdrong",
"description":"thirdrong"
}
],
"subgroups":[
{
"name":"demoapp",
"path":"demoapp",
"parent":"firstrong"
},
{
"name":"demoapp2",
"path":"demoapp2",
"parent":"secondrong"
},
{
"name":"demoapp3",
"path":"demoapp3",
"parent":"thirdrong"
}
]
}
}
- 生成的tf 内容(参考)
provider "gitlab" {
base_url = "http://gitlabserver/api/v4/"
token = "${var.gitlab_token}"
}
resource "gitlab_group" "firstrong" {
name = "firstrong"
path = "firstrong"
description = "firstrong"
}
resource "gitlab_group" "secondrong" {
name = "secondrong"
path = "secondrong"
description = "secondrong"
}
resource "gitlab_group" "thirdrong" {
name = "thirdrong"
path = "thirdrong"
description = "thirdrong"
}
resource "gitlab_group" "demoapp" {
name = "demoapp"
path = "demoapp"
description = ""
parent_id = "${gitlab_group.firstrong.id}"
}
resource "gitlab_group" "demoapp2" {
name = "demoapp2"
path = "demoapp2"
description = ""
parent_id = "${gitlab_group.secondrong.id}"
}
resource "gitlab_group" "demoapp3" {
name = "demoapp3"
path = "demoapp3"
description = ""
parent_id = "${gitlab_group.thirdrong.id}"
}
resource "gitlab_project" "firstrong" {
name = "firstrong"
namespace_id = "${gitlab_group.firstrong.id}"
}
resource "gitlab_project" "secondrong" {
name = "secondrong"
namespace_id = "${gitlab_group.secondrong.id}"
}
resource "gitlab_project" "thirdrong" {
name = "thirdrong"
namespace_id = "${gitlab_group.thirdrong.id}"
}
- gitlab group层级查询(主要是判断使用子group 的情况)
这是一个简单四级的处理,实际情况在处理下
select a.name as parent,b.name,b.path,b.description,c.name as thirdname,c.path as thirdpath ,c.description as thirddescription, d.name as fouthname, d.path as fouthpaht from namespaces a join namespaces b on a.id=b.parent_id join namespaces c on b.id=c.parent_id join namespaces d on c.id=d.parent_id where b.parent_id is not null and c.parent_id is not null;
rest api 生成方式
上边有一个简单的rest 内容格式,设计的生成我们可以直接使用sqler(简单方便,写几个简单的sql 就可以搞定了)
- sqler docker 运行配置
version: "3"
services:
sqler:
image: dalongrong/sqler:2.0
volumes:
- "./config/config-example.hcl:/app/config.example.hcl"
environment:
- "DSN=postgresql://username:password@server:port/dbname"
ports:
- "3678:3678"
- "8025:8025"
- 配置文件(参考,很简单,写了几个,按照实际补充下,这个可能实际运行有问题)
group {
exec = <<SQL
SELECT disinct(name) from projects where namespace_id !=nil
SQL
}
group_project {
exec = <<SQL
SELECT disinct(name) from projects where namespace_id !=nil
SQL
}
gitlab_project {
aggregate = ["group", "group_project"]
}
说明
以上只是一个简单的说明,以及简单实践,实际迁移可能没有这么简单,或有各类的问题,还是需要部分人员的介入处理,因为毕竟每个
团队对于git 的使用情况是不一样的,需要实际问题实际考虑,变通下,实际上对于git 的同步可以使用tf 操作,使用remote_exec
provisioners