ArchLinux安装与使用
ArchLinux安装
Win设置
关闭安全启动
-重启进入固件设置,关闭安全启动(一般按ESC会出现选择界面)
关闭休眠与快速启动
# 管理员模式启动cmd
powercfg -h off
硬件时间设置
# 设置Windows中将 BIOS时间 作为 UTC时间
reg add "HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\TimeZoneInformation" /v RealTimeIsUniversal /d 1 /t REG_DWORD /f
安装系统
改tty字体
# 1920x1080下推荐,其他见/usr/share/kbd/consolefonts
setfont ter-132b
验证引导模式
cat /sys/firmware/efi/fw_platform_size
# 64/32为UEFI,否则为BIOS
连接互联网
# 关闭reflector服务
systemctl stop reflector
# 启动audit服务(可选,遇到一直刷audit)
systemctl start auditd
# 查看网络状态,可以看到无线设备一般为wlan0
ip link
# 连接无线网络
iwctl
station wlan0 connect CMCC-AH6G # CMCC-AH6G是Wifi名
quit
更新系统时间
# 联网后会自动同步
timedatectl #查看UTC时间是否正确
分区与挂载
disk | 挂载点 | 大小 |
---|---|---|
/dev/nvme0n1p2 | /efi (官方推荐/boot 或/efi) | 100M |
/dev/nvme0n1p5 | / | 40G |
/dev/nvme0n1p6 | [swap] | 8G |
/dev/sda2 | /opt | 100G |
/dev/sda3 | /home | 331.5G |
/dev/sda1 | /data | 500G |
lsblk #查看磁盘信息
# 分区
cfdisk /dev/sda
cfdisk /dev/nvme0n1
# 格式化(双系统不能格式化EFI分区)
mkfs.ext4 /dev/nvme0n1p5
mkfs.ext4 /dev/sda2
mkfs.ext4 /dev/sda3
# 交换空间
mkswap /dev/nvme0n1p6
# 挂载(先挂载根分区)
mount --mkdir /dev/nvme0n1p5 /mnt
mount --mkdir /dev/nvme0n1p2 /mnt/efi
mount --mkdir /dev/sda2 /mnt/opt
mount --mkdir /dev/sda3 /mnt/home
mount --mkdir /dev/sda1 /mnt/data #可选
# 启动交换空间
swapon /dev/nvme0n1p6
lsblk # 最后查看是否有误
选择镜像站
# 选择镜像站 选择ustc源移至顶部
vim /etc/pacman.d/mirrorlist
/China+回车 #查找China
35dd #剪切35行
gg #返回顶部
p #粘贴
/ustc+回车 #查找ustc
n #匹配下一项
dd #剪切1行
gg #返回顶部
p #粘贴
:wq #保存并退出
pacman -Syy
安装基础包
# 安装必需软件包,intel-ucode 或 amd-ucode
pacstrap -K /mnt base linux linux-firmware base-devel linux-headers intel-ucode
# LTS
pacstrap -K /mnt base linux-lts linux-firmware base-devel linux-lts-headers intel-ucode
生成fstab
# 生成fstab
genfstab -U /mnt >> /mnt/etc/fstab
cat /mnt/etc/fstab
配置系统
进入新系统
# 以arch-chroot模式 进入系统
arch-chroot /mnt
安装补充包
# vim
pacman -S vim
# man工具
pacman -S man-db man-pages texinfo
# 文件系统支持(建议)
pacman -S dosfstools ntfs-3g
设置时区,时间同步
# 设置时区
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
# 设置硬件时间为 UTC 时间
hwclock --systohc
区域与本地化设置
# 区域语言支持(英+中)
vim /etc/locale.gen
en_US.UTF-8 UTF-8
zh_CN.UTF-8 UTF-8
locale-gen
# 本地化设置
echo 'LANG=en_US.UTF-8' > /etc/locale.conf
用户配置
# 设置root密码
passwd
# 创建用户ruoli
useradd -m -G wheel ruoli
passwd ruoli
# 给予sudo无密码权限
EDITOR=vim visudo
%wheel ALL=(ALL:ALL) NOPASSWD: ALL
网络配置
# 创建 hostname 文件
echo 'ruoli-arch' > /etc/hostname
# 主机名解析
vim /etc/hosts
127.0.0.1 localhost
::1 localhost
127.0.0.1 ruoli-arch.localdomain
# 网络管理器
pacman -S networkmanager
systemctl enable NetworkManager
引导配置
# os-prober可发现其他系统
pacman -S grub efibootmgr os-prober
# 下载EFI程序grubx64.efi
grub-install --target=x86_64-efi --efi-directory=/efi --bootloader-id=ArchLinux
# 生成grub.cfg
grub-mkconfig -o /boot/grub/grub.cfg
关于 initramfs
mkinitcpio -P # 全部内核打包,一般无需执行
# 或者
mkinitcpio -p linux
mkinitcpio -p linux-lts
重启
#退出chroot模式
exit
umount -R /mnt
swapoff /dev/nvme0n1p6
reboot # 移除安装介质,测试ruoli 和 root用户能否登录
安装之后
设置tty字体
setfont LatGrkCyr-12x22
持久化tty配置
vim /etc/vconsole.conf
KEYMAP=us
FONT=LatGrkCyr-12x22
修补grub.cfg
vim /etc/default/grub
GRUB_DISABLE_OS_PROBER=false
grub-mkconfig -o /boot/grub/grub.cfg
连接互联网
# 联网(终端UI界面)
nmtui
修改hosts
ip addr # 查看IP地址
inet 192.168.10.3/24 brd 192.168.10.255 scope global dynamic wlo1
vim /etc/hosts
192.168.10.3 ruoli-arch
组件与服务
添加32位库
# 修改Pacman配置
vim /etc/pacman.conf
Color
[multilib]
Include = /etc/pacman.d/mirrorlist
pacman -Syy
安装工具与服务
# 字体
# pacman -S noto-fonts-cjk
pacman -S adobe-source-han-sans-cn-fonts adobe-source-han-serif-cn-fonts
# 工具包
pacman -S openssh git wget curl tree
# SSH服务
systemctl enable sshd
安装显卡驱动
参考wiki:Intel、Nvidia、NVIDIA_Optimus
版本查询:Intel显卡支持、Nvidia显卡型号
# 查询 处理器与显卡 型号
lscpu | grep -E CPU
lspci | grep -E VGA
# 本机配置
- CPU 8代i5
Intel(R) Core(TM) i5-8300H CPU @ 2.30GHz
- GPU
Intel Corporation CoffeeLake-H GT2 [UHD Graphics 630]
NVIDIA Corporation GP107M [GeForce GTX 1050 Ti Mobile] (rev a1)
# 安装
# Intel集显
pacman -S mesa lib32-mesa
pacman -S vulkan-intel lib32-vulkan-intel
pacman -S intel-compute-runtime
# Nvidia独显 (LTS内核安装 nvidia-lts)
pacman -S nvidia nvidia-utils lib32-nvidia-utils
pacman -S opencl-nvidia lib32-opencl-nvidia
# 双显卡方案
pacman -S nvidia-prime
驱动配置
# Intel集显配置
vim /etc/mkinitcpio.conf
MODULES=(... i915 ...)
mkinitcpio -P
# Nvidia独显配置
vim /etc/default/grub
GRUB_CMDLINE_LINUX_DEFAULT="... nvidia_drm.modeset=1"
grub-mkconfig -o /boot/grub/grub.cfg
vim /etc/mkinitcpio.conf
HOOKS=(... kms ...) # 去掉kms
MODULES=(... nvidia nvidia_modeset nvidia_uvm nvidia_drm ...)
mkinitcpio -P
桌面环境(自选)
# 显示服务 + 显示管理器 + 桌面环境/桌面管理器
# KDE(自用,推荐)
pacman -S plasma-meta
pacman -S konsole dolphin # 终端+文件管理器
注:编码器`ffmpeg`,声音服务器`pipewire-jack`,字体`noto-fonts`
systemctl enable sddm
reboot
# Cinnamon桌面
# lightdm + cinnamon
pacman -S xorg-server wayland xorg-xwayland
pacman -S lightdm lightdm-gtk-greeter
pacman -S cinnamon gnome-terminal
systemctl enable lightdm
reboot
基础软件包(可选)
注:基于KDE(其他桌面环境慎用)-i18n-zh-cn
sudo pacman -S firefox #火狐浏览器*
sudo pacman -S code #文本编辑器*
sudo pacman -S okular #文档查看器*
sudo pacman -S gwenview #图片查看器*
sudo pacman -S elisa #音乐播放器*
sudo pacman -S vlc #媒体播放器*
sudo pacman -S ark #压缩管理工具* ark
sudo pacman -S unzip unrar #压缩支持*
sudo pacman -S ksystemlog #系统日志查看*
sudo pacman -S partitionmanager #分区管理器*
sudo pacman -S kdeconnect #设备连接工具*
sudo pacman -S timeshift #系统备份*
sudo pacman -S gimp #图像编辑* gimp
sudo pacman -S kdenlive #视频剪辑* kdenlive
sudo pacman -S kcalc #计算器*
sudo pacman -S spectacle #屏幕截图工具*
sudo pacman -S htop #进程查看器* htop
sudo pacman -S yakuake #下拉式终端* F12
sudo pacman -S flameshot #火焰截图*
sudo pacman -S neofetch #系统信息 hyfetch
ArchLinux优化
改/data分区权限(旧)
- 调整分区(删,扩/缩,移)最好在ArchISO环境
sudo cp -r /etc/fstab /etc/fstab.bak
sudo vim /etc/fstab
# /dev/sda1 LABEL=Data
UUID=D800F1EF00F1D48A /data ntfs rw,nosuid,nodev,uid=1000,gid=1000,dmask=022,fmask=133,blksize=4096 0 0
reboot
id ruoli # 查看 uid 与 gid
uid=1000(ruoli) gid=1000(ruoli) 组=1000(ruoli),998(wheel)
# 目录权限 rwxr-xr-x 即755: dmask=777-755=022
# 文件权限 rw-r--r-- 即644: fmask=777-644=133
软件包管理
配置Pacman
# 颜色输出 详细信息 并行下载
sudo vim /etc/pacman.conf
Color
VerbosePkgLists
ParallelDownloads = 5
添加Pacman仓库
sudo vim /etc/pacman.conf
[archlinuxcn]
Server = https://mirrors.tuna.tsinghua.edu.cn/archlinuxcn/$arch
sudo pacman-key --lsign-key "farseerfc@archlinux.org"
sudo pacman -Sy archlinuxcn-keyring
[arch4edu]
Server = https://mirrors.tuna.tsinghua.edu.cn/arch4edu/$arch
# 导入 GPG key
sudo pacman-key --recv-keys 7931B6D628C8D3BA
sudo pacman-key --finger 7931B6D628C8D3BA
sudo pacman-key --lsign-key 7931B6D628C8D3BA
sudo pacman -Syyu
AUR工具
sudo pacman -S paru
sudo vim /etc/paru.conf
BottomUp
GitHub加速(可选)
参考:https://gitee.com/if-the-wind/github-hosts
sudo vim /etc/hosts
# 加速访问GitHub
185.199.109.154 github.githubassets.com
140.82.112.22 central.github.com
185.199.108.133 desktop.githubusercontent.com
185.199.108.153 assets-cdn.github.com
185.199.108.133 camo.githubusercontent.com
185.199.108.133 github.map.fastly.net
146.75.37.194 github.global.ssl.fastly.net
140.82.112.3 gist.github.com
185.199.109.153 github.io
140.82.112.3 github.com
140.82.114.6 api.github.com
185.199.111.133 raw.githubusercontent.com
185.199.108.133 user-images.githubusercontent.com
185.199.108.133 favicons.githubusercontent.com
185.199.109.133 avatars5.githubusercontent.com
185.199.108.133 avatars4.githubusercontent.com
185.199.108.133 avatars3.githubusercontent.com
185.199.111.133 avatars2.githubusercontent.com
185.199.111.133 avatars1.githubusercontent.com
185.199.108.133 avatars0.githubusercontent.com
185.199.108.133 avatars.githubusercontent.com
140.82.114.10 codeload.github.com
3.5.22.156 github-cloud.s3.amazonaws.com
52.217.91.140 github-com.s3.amazonaws.com
52.217.90.252 github-production-release-asset-2e65be.s3.amazonaws.com
52.217.117.201 github-production-user-asset-6210df.s3.amazonaws.com
3.5.27.169 github-production-repository-file-5c1aeb.s3.amazonaws.com
185.199.108.153 githubstatus.com
140.82.113.17 github.community
185.199.108.133 media.githubusercontent.com
Grub优化
添加重启/关机选项
sudo vim /boot/grub/custom.cfg
menuentry "System shutdown" {
echo "System shutting down..."
halt
}
menuentry "System restart" {
echo "System rebooting..."
reboot
}
Grub 主题
主题推荐 Grub-theme-vimix Github地址:grub2-themes
cd ~/Documents/tools/grub
tar -xvf Vimix-1080p.tar.xz
sudo mv Vimix-1080p/Vimix /boot/grub/themes/
rm -rf Vimix-1080p
# 配置theme
sudo vim /etc/default/grub
GRUB_THEME="/boot/grub/themes/Vimix/theme.txt"
sudo grub-mkconfig -o /boot/grub/grub.cfg
Grub UI(非必要)
sudo cp /etc/default/grub /etc/default/grub.bak
sudo cp /boot/grub/grub.cfg /boot/grub/grub.cfg.bak
# UI界面配置Grub
sudo pacman -S grub-customizer
系统中文化
本地Locale
# 全局配置英文
sudo vim /etc/locale.conf
LANG=en_US.UTF-8
# 图形界面单独配置中文
# 配置环境变量,注:KDE直接设置:区域和语言-中文
export LANG=zh_CN.UTF-8
export LANGUAGE=zh_CN:en_US
中文字体(按需)
sudo pacman -S adobe-source-han-sans-cn-fonts adobe-source-han-serif-cn-fonts #Fixbug:复制卡顿,一般安装这个就可
sudo pacman -S noto-fonts-cjk
sudo pacman -S wqy-microhei
# 刷新字体缓存
fc-cache -fv
中文输入法
输入法:框架+引擎+词库
fcitx5框架
# 基础框架fcitx5
sudo pacman -S fcitx5-im
- 键盘 > 虚拟键盘, 选择Fcitx5 (KDE Wayland)
- Wayland推荐单独配置 GTK_IM_MODULE=fcitx QT_IM_MODULE=fcitx
sudo vim /etc/environment
XMODIFIERS=@im=fcitx
# GTK_IM_MODULE=fcitx
#QT_IM_MODULE=fcitx
# fcitx5主题(建议)
sudo pacman -S fcitx5-breeze
- 输入法 > 配置附加组件-经典用户界面:主题/深色主题 设置为 暗色微风(蓝色)
# fcitx5诊断(输入法有问题执行该命令)
fcitx5-diagnose
Pinyin
# Pinyin输入法:引擎+词库
sudo pacman -S fcitx5-chinese-addons fcitx5-pinyin-zhwiki
- 输入法-添加输入法:拼音
- 配置拼音
启用云拼音,设置可用后端
翻页选项 添加[]
Rime
Rime需要输入方案(自带部分方案,也可自定义),我使用雾凇拼音方案
安装与配置
# rime引擎
sudo pacman -S fcitx5-rime
1.系统设置 > 输入法(提示更新引擎)-添加输入法:中州韵
2.切換輸入法至 中州韻
(首次使用自動初始化)
# Rime工作空间
tree -L 1 ~/.local/share/fcitx5/rime
# 雾凇拼音方案 位于:/usr/share/rime-data/
sudo pacman -S rime-ice-git
cd ~/.local/share/fcitx5/rime
# 1.全局配置(引入rime_ice配置,配置通用配置:候选词,快捷键等)
vim default.custom.yaml
sudo vim /usr/share/rime-data/rime_ice_suggestion.yaml
# 2.方案专有配置(可选,配置主题,字体等)
touch rime_ice.custom.yaml
# 3.配置完需重新部署
- 右键输入法图标,中州韵-xxx-重新部署
#查看生成的文件
more build/default.yaml
more build/rime_ice.schema.yaml
default.custom.yaml
patch:
# 引用「雾凇拼音」配置: /usr/share/rime-data/rime_ice_suggestion.yaml
__include: rime_ice_suggestion:/
# 自定义配置
__patch:
# 候选词个数
menu/page_size: 7
# 绑定键样式:「输入法模式-接收键-触发事件」
key_binder/bindings/+:
# 开启逗号句号翻页
- { when: paging, accept: comma, send: Page_Up }
- { when: has_menu, accept: period, send: Page_Down }
rime_ice_suggestion.yaml
# 方案列表
schema_list:
# - schema: double_pinyin_sogou # 搜狗双拼
# - schema: double_pinyin_flypy # 小鹤双拼
# - schema: double_pinyin_ziguang # 紫光双拼
# 快捷键
key_binder:
# Lua 配置: 以词定字(上屏当前词句的第一个或最后一个字),和中括号翻页有冲突
# select_first_character: "bracketleft" # 左中括号 [
# select_last_character: "bracketright" # 右中括号 ]
bindings:
# 翻页 [ ]
- { when: paging, accept: bracketleft, send: Page_Up }
- { when: has_menu, accept: bracketright, send: Page_Down }
安装Win字体(非必要)
# 从本地安装 进入 c:/Windows/Fonts (先挂载)
mkdir -p ~/.local/share/fonts
cd /run/media/ruoli/F4EAAAB0EAAA6E94/Windows/Fonts
cp -avf ../Fonts ~/.local/share/fonts/WindowsFonts
# 刷新字体
fc-cache -vf
Shell优化
别名alias
alias pacman='sudo pacman'
alias ll='ls -lh'
alias lla='ls -alh'
alias vim='sudo env HOME=$HOME vim'
alias nvim='sudo env HOME=$HOME nvim'
Zsh&OhMyZsh
官网 OhMyZsh,是一个Zsh框架,旨在简单配置Zsh。
# Zsh
sudo pacman -S zsh
# OhMyZsh
cd ~/Documents/github
git clone https://github.com/ohmyzsh/ohmyzsh.git
# 安装脚本
~/Documents/github/ohmyzsh/tools/install.sh
# tools工具: 卸载,主题选择等
~/.oh-my-zsh/tools/theme_chooser.sh
sh ~/.oh-my-zsh/tools/uninstall.sh
主题与插件
# 插件: 自动提示+语法高亮
cd ~/Documents/github
git clone https://github.com/zsh-users/zsh-autosuggestions.git
git clone https://github.com/zsh-users/zsh-syntax-highlighting.git
cp -r ~/Documents/github/zsh-* ~/.oh-my-zsh/custom/plugins
# 修改配置文件
vim ~/.zshrc
ZSH_THEME="frisk"
plugins=(
git
zsh-autosuggestions
zsh-syntax-highlighting
)
# 注释下面plugins行,且高亮插件放最后
source ~/.zshrc
Vim优化
Vim
vim ~/.vimrc # 可选,ArchLinux默认有Vim配置
set nocompatible " 禁用与vi的兼容性
filetype on " 探测文件类型
filetype plugin on " 按照文件类型加载插件
filetype indent on " 按照文件类型设置缩进
syntax on " 打开语法高亮
set mouse=a " 鼠标支持
set number " 显示行号
set relativenumber " 显示相对行号
set cursorline " 高亮当前行
"set cursorcolumn " 高亮当前列
set shiftwidth=2 " 设置位移宽度为2
set tabstop=2 " 设置缩进宽度为2
set expandtab " 将缩进替换为空格
set nobackup " 不生成backup文件
set scrolloff=10 " 设置滚动时始终显示上下10行
set nowrap " 禁止折行
set incsearch " 增量式搜索
set ignorecase " 搜索时大小写不敏感
set smartcase " 搜索时对首字母大小写敏感
set showcmd " 显示键入的命令前缀
set showmode " 显示当前模式(插入、可视等)
set showmatch " 在搜索过程中显示匹配的单词
set hlsearch " 高亮搜索结果
set history=1000 " 设置命令历史记录为1000
set wildmenu " 设置tab补全
set wildmode=list:longest " 使tab补全类似于Bash
set encoding=utf-8 " 设置编码方式为UTF-8
Neovim
sudo pacman -S neovim
# Neovim工作空间 (在此配置,或者使用已有方案,比如Lazyvim)
mkdir -p ~/.config/nvim
touch ~/.config/nvim/init.lua
LazyVim方案
官网 LazyVim
git clone https://github.com/LazyVim/starter.git ~/.config/nvim
nvim # 首次会自动初始化(网络状况得好),也可在此更新
l # Lazy选项
Shift + u # 更新
# 安装Nerd Font字体, 并设置终端字体(Hack已支持图标) (建议)
sudo pacman -S ttf-jetbrains-mono-nerd
# 组件(建议)
sudo pacman -S xclip # 剪切板
sudo pacman -S lazygit
sudo pacman -S ripgrep
sudo pacman -S fd
# 重启
reboot
设备硬件相关
显卡驱动
Intel驱动
Archwiki:Intel_graphics 型号查询:Intel显卡支持
Intel(R) Core(TM) i5-8300H CPU @ 2.30GHz
Intel Corporation CoffeeLake-H GT2 [UHD Graphics 630]
# Intel
# KDE不建议安装xf86-video-intel,而是回落到 modesetting 驱动程序
sudo pacman -S mesa lib32-mesa
sudo pacman -S vulkan-intel lib32-vulkan-intel
sudo pacman -S intel-compute-runtime
# sudo pacman -S intel-media-driver # 9代开始有视频硬解支持
# Intel集显配置
sudo vim /etc/mkinitcpio.conf
MODULES=(... i915 ...) # 添加i915
sudo mkinitcpio -P
Nvidia驱动
Archwiki:NVIDIA 型号查询:Nvidia显卡型号
NVIDIA Corporation GP107M [GeForce GTX 1050 Ti Mobile] (rev a1)
# Nvidia闭源驱动 LTS内核安装 nvidia-lts
sudo pacman -S nvidia nvidia-utils lib32-nvidia-utils
sudo pacman -S opencl-nvidia lib32-opencl-nvidia
# Nvidia独显配置
# 1.防止加载nouveau模块
sudo vim /etc/mkinitcpio.conf
HOOKS=(... kms ...) # 去掉kms
sudo mkinitcpio -P
# 2.启用DRM KMS
sudo vim /etc/default/grub
GRUB_CMDLINE_LINUX_DEFAULT="... nvidia_drm.modeset=1"
sudo grub-mkconfig -o /boot/grub/grub.cfg
# 3.kms早启动(建议,因为DRM KMS只包含基础功能)
sudo vim /etc/mkinitcpio.conf
MODULES=(... nvidia nvidia_modeset nvidia_uvm nvidia_drm ...)
sudo mkinitcpio -P
# 4.pacman钩子(防止更新nvidia驱动时,忘记更新initramfs)
sudo mkdir -p /etc/pacman.d/hooks
sudo vim /etc/pacman.d/hooks/nvidia.hook
nvidia.hook
[Trigger]
Operation=Install
Operation=Upgrade
Operation=Remove
Type=Package
Target=nvidia-lts
Target=linux-lts
# 如果使用其他内核,请更改上面的nvidia linux部分
[Action]
Description=Update Nvidia module in initcpio
Depends=mkinitcpio
When=PostTransaction
NeedsTargets
Exec=/bin/sh -c 'while read -r trg; do case $trg in linux*) exit 0; esac; done; /usr/bin/mkinitcpio -P'
双显卡方案
使用具有 Intel 和 NVIDIA 双显卡的笔记本, 请参考 NVIDIA Optimus 页面
# prime方案,prime-run 程序 &
sudo pacman -S nvidia-prime
# 系统默认使用 集显
glxinfo | grep "OpenGL renderer"
# 测试使用N卡
prime-run glxinfo | grep "OpenGL renderer"
# 使用N卡运行常用程序 (& 表示后台运行)
prime-run firefox &
prime-run google-chrome-stable &
prime-run steam &
# 打印状态
nvidia-smi
# 持续监视
watch -n 1 nvidia-smi
固件更新(按需)
sudo pacman -S fwupd
# 显示 fwupd 检测到的所有设备:列表中的部分设备可能不能使用该工具更新,例如 Intel 核芯显卡,可以替代地提供的供应商解决方案。
fwupdmgr get-devices
# 从Linux Vendor firmware Service(LVFS) 下载最新的元数据:
fwupdmgr refresh
# 列出可用更新:
fwupdmgr get-updates
# 安装更新
sudo fwupdmgr update
无固件更新可用的设备:
• HGST HTS721010A9E630
• Internal SPI Controller
• MZVLW128HEGR-000H1
• System Firmware
固件已是最新的设备:
• UEFI dbx
系统服务
文件索引(可选)
# 大部分发行版都提供了`locate`命令进行快速文件搜索
# Arch建议安装 mlocate包,并执行`updatedb`建立文件系统索引
sudo pacman -S mlocate
sudo updatedb
# 索引:/var/lib/mlocate/mlocate.db
打印
# CUPS 不再推荐使用驱动,请尽量使用 IPP Everywhere(安装cups-pdf包)
sudo pacman -S cups cups-pdf
# 使用套接字激活方式,使得仅当有程序需要使用服务时才启动CUPS
sudo systemctl disable --now cups
sudo systemctl enable --now cups.socket
本地邮件服务(可选)
# sudo pacman -S kmail
防火墙
sudo pacman -S firewalld
# 有需求就启动
sudo systemctl enable --now firewalld
蓝牙
sudo pacman -S bluez #bluez-utils
# 启用并启动服务
sudo systemctl enable --now bluetooth
声音(可选)
# Pipewire可作为其他音频服务器的直接替代品。例如:PulseAudio Jack
sudo pacman -S pipewire
sudo pacman -S pipewire-pulse
sudo pacman -S pipewire-jack
。。。
系统管理与维护(持续)
清理 /var 目录
# 1.存放缓存,可清理
sudo du -sh /var/cache/*
4.2G /var/cache/pacman # 需要清理
sudo rm -rf /var/cache/pacman/pkg/*
# sudo cp -avf /var/cache /home/cache
# sudo rm -rf /var/cache
# sudo ln -s /home/cache /var/cache
# 2.存放应用数据(基本不能清理)
sudo du -sh /var/lib/*
112M /var/lib/flatpak
221M /var/lib/mysql
177M /var/lib/systemd
# 3.存放日志(可清理,不能随便清理,可能有登陆信息等)
sudo du -sh /var/log/*
530M /var/log/journal
# 删除超过一天未修改的文件
find /var/log/journal//var/log/journal/f0ebb09c37fa439faa1ed9ed7e7b74fc/ -type f -mtime +1 -exec sudo rm -rf {} \;
删除不需要的包
sudo pacman -Qtdq | sudo pacman -Rns -
错误:有参数 '-' 但标准输入流为空 #说明没有孤立包
删除旧配置文件
ls ~/.config/ # 软件保存配置文件的地方
ls ~/.cache/ # 程序缓存
ls ~/.local/share/ # 可能有旧文件
ls ~/.local/state/ # 可能有旧文件
破损的软链接
# 列出有问题的软链接,可以检查并删除(不能盲目删除)
sudo find / -xtype l -print > ~/Downloads/link.log
# 未写进link.log,直接打印在终端的即有问题的(不一定能删除)
find: ‘/run/user/1000/doc’: 权限不够
find: ‘/proc/25922/task/25922/fd/6’: 没有那个文件或目录
find: ‘/proc/25922/task/25922/fdinfo/6’: 没有那个文件或目录
find: ‘/proc/25922/fd/5’: 没有那个文件或目录
find: ‘/proc/25922/fdinfo/5’: 没有那个文件或目录
Bug修复(按需)
Failed to load module "appmenu-gtk-module"
sudo pacman -S appmenu-gtk-module #
hostname command not found
sudo pacman -S inetutils #
Discover xxx packages support
sudo pacman -S packagekit-qt6 #
egrep: warning: egrep is obsolescent; using grep -E
sudo vim /usr/bin/egrep #
# echo "$cmd: warning: $cmd is obsolescent; using grep -E" >&2
WARNING: Possibly missing firmware for module: 'xhci_pci'
paru -S upd72020x-fw #
WARNING: Possibly missing firmware for module: 'wd719x'
WARNING: Possibly missing firmware for module: 'aic94xx'
sudo pacman -Sy wd719x-firmware aic94xx-firmware #
WARNING: Possibly missing firmware for module: 'qed'
WARNING: Possibly missing firmware for module: 'bfa'
WARNING: Possibly missing firmware for module: 'qla2xxx'
WARNING: Possibly missing firmware for module: 'qla1280'
sudo pacman -S linux-firmware-qlogic #
透明代理(未完善)
V2rayA
sudo pacman -S v2raya
KDE优化
桌面卡住
# 打开终端(Ctrl+Alt+t 或 F12) 或者 TTY(Ctrl+Alt+F1~F6)
kquitapp6 plasmashell && kstart plasmashell
系统设置
鼠标和触摸板
1.鼠标光标速度:0.4
2.禁用触摸板设备(笔记本打字总误触)
3.触摸板光标速度:0.4
颜色和主题
1.全局主题 设置为 Breeze微风(推荐浅色)
2.登录屏幕 设置为 Breeze微风
壁纸
1.添加本地壁纸,并设置
应用和窗口
默认应用程序
1.设置默认浏览器 推荐 Chrome
2.设置默认编辑器 推荐 VS Code
窗口管理
桌面特效
1.窗口粉碎动画
2.最小化过渡动画(神灯)
虚拟桌面
1.添加桌面2
2.循环切换
3.切换显示动画设置为淡出桌面
区域和语言
1.语言 设置为 简体中文,重启
输入法
1.见 中文输入法
软件更新
1.通知频率改为每周
2.应用系统更新 设置为 重新开机之后应用
自动启动
1.添加 Yakuake下拉式终端
会话
后台服务
1.取消 欢迎中心启动程序
2.取消 Plasma浏览器集成程序安装提醒
桌面会话
1.设置 启动为空会话
桌面面板
注:自带的默认面板放左边,这样固定到任务管理器会默认出现在左边。
- 左边:默认面板(面板宽度36,非悬浮,避开窗口/总是显示)
- 应用程序启动器(更改图标)
- 图标任务管理器
- 边距分割符
- 回收站(增)
- 暂时显示桌面
- 上边:默认面板(面板宽度30,非悬浮,总是显示)
- 应用程序启动器(更改图标)
- 虚拟桌面切换器(配置-常规-文本显示设置为桌面编号)
- 全局菜单(增)
- 面板间隙(增:面板设置-增加间隙)
- 图标任务管理器
图标间距设置为大
新任务出现在左侧
- 总CPU使用情况(增)
- 内存使用情况(增)
- 边距分割符
- 系统托盘
- 数字时钟
日期显示在时间左侧
日期格式:M月d日 ddd
- 暂时显示桌面
终端美化
左边: Konsole 右边: Yakuake
- 1. 新增方案 `ruoli-zsh`,初始终端尺寸 `92x30`(推荐)
- 2. 字体为`JetBrainsMono Nerd Font`,大小为 `10pt`(Hack也可)
- 3. 背景色透明度 在`15-30` 之间,推荐20
软件优化
Konsole
1.设置-显示工具栏
取消主工具栏
取消会话工具栏
2.配置Konsole
取消记住窗口大小
Yakuake
- 宽度50 高度60
- 位置设置为最右
- 安装皮肤TabsOnly,并使用
- 取消置顶
- 隐藏边框
Spectacle
1.配置-快捷键
启动Spectacle:设置全局 为 Print键
ArchLinux软件
常用工具包
# netcat: nc -lp 7777
pacman -S gnu-netcat
软件个人规范
# 软件安装目录
mkdir -p ~/Programs
mkdir -p ~/Programs/AppImage
# 软件快捷方式目录
mkdir -p ~/.local/share/applications
# 软件自启目录
mkdir -p ~/.config/autostart
Typora(例)
# 解压安装包
tar -zxvf Typora-linux-x64-0.11.18.tar.gz
mv bin/Typora-linux-x64/ ~/Programs/Typora # 此时可重命名
rm -rf bin
# 快捷启动方式
vim ~/.local/share/applications/typora.desktop
# 软件自启(演示)
cp ~/.local/share/applications/typora.desktop ~/.config/autostart/
typora.desktop
[Desktop Entry]
Name=Typora
GenericName=Markdown Editor
Comment=A minimal Markdown reading & writing app
# 程序
Exec=env GTK_IM_MODULE=fcitx QT_IM_MODULE=fcitx /home/ruoli/Programs/Typora/Typora
Icon=/home/ruoli/Programs/Typora/resources/assets/icon/icon_512x512.png
Terminal=false
Type=Application
Categories=Office;Development;
# 支持的MIME类型
MimeType=text/markdown;text/x-markdown;
开发相关
开发环境
环境目录
sudo mkdir /opt/module
sudo chown ruoli:ruoli /opt/module
JDK
## OpenJDK(建议装上)
pacman -S jdk-openjdk
## OracleJDK
cd ~/Documents/tools/env
tar -zxvf jdk-8u201-linux-x64.tar.gz
mv jdk1.8.0_201 /opt/module/jdk
# 环境变量
sudo vim /etc/environment
JAVA_HOME=/opt/module/jdk
sudo vim /etc/profile.d/myenv.sh
# JDK
export JAVA_HOME=/opt/module/jdk
export PATH=$PATH:$JAVA_HOME/bin
source /etc/profile
Maven
tar -zxvf apache-maven-3.9.6-bin.tar.gz
mv apache-maven-3.9.6 /opt/module/maven
# 环境变量
sudo vim /etc/environment
M2_HOME=/opt/module/maven
sudo vim /etc/profile.d/myenv.sh
# Maven
export M2_HOME=/opt/module/maven
export PATH=$PATH:$M2_HOME/bin
source /etc/profile
# 软件配置
mkdir ~/.m2 && cp $M2_HOME/conf/settings.xml ~/.m2/setting.xml
vim ~/.m2/setting.xml
mvn -v
setting.xml
<localRepository>/home/ruoli/.m2/repository</localRepository>
<mirror>
<id>nexus-aliyun</id>
<mirrorOf>central</mirrorOf>
<name>公共仓库</name>
<url>http://maven.aliyun.com/nexus/content/groups/public</url>
</mirror>
MySQL
# 注:保证环境干净再安装
sudo pacman -S mysql
# 初始化
sudo mysqld --initialize --user=mysql --basedir=/usr --datadir=/var/lib/mysql
# 启动服务
sudo systemctl enable --now mysqld
mysql -u root -p
# SQL
ALTER USER 'root'@'localhost' IDENTIFIED BY 'root';
update mysql.user set host='%' where user='root';
FLUSH PRIVILEGES;
select host,user,plugin from mysql.user;
quit;
IDEA
# 解压方式安装
tar -zxvf ideaIU-2021.3.3.tar.gz
mv idea-IU-213.7172.25 ~/Programs/idea2021
vim ~/.local/share/applications/idea.desktop
# 破解步骤
- 1.下载破解文件jetbra
- 2.vmoptions文件中引用破解文件
- 3.启动应用,输入注册码(详见www.exception.site)
cp -r jetbra /home/ruoli/Programs/
vim ~/Programs/idea2021/bin/idea64.vmoptions
--add-opens=java.base/jdk.internal.org.objectweb.asm=ALL-UNNAMED
--add-opens=java.base/jdk.internal.org.objectweb.asm.tree=ALL-UNNAMED
-javaagent:/home/ruoli/Programs/jetbra/ja-netfilter.jar=jetbrains
idea.desktop
[Desktop Entry]
Name=IntelliJ IDEA
GenericName=IntelliJ IDEA
Exec=/home/ruoli/Programs/idea2021/bin/idea.sh
Icon=/home/ruoli/Programs/idea2021/bin/idea.png
Type=Application
Categories=Development;
DataGrip
tar -zxvf datagrip-2021.3.4.tar.gz
mv DataGrip-2021.3.4 ~/Programs/datagrip2021
vim ~/.local/share/applications/datagrip.desktop
vim ~/Programs/datagrip2021/bin/datagrip64.vmoptions
--add-opens=java.base/jdk.internal.org.objectweb.asm=ALL-UNNAMED
--add-opens=java.base/jdk.internal.org.objectweb.asm.tree=ALL-UNNAMED
-javaagent:/home/ruoli/Programs/jetbra/ja-netfilter.jar=jetbrains
datagrip.desktop
[Desktop Entry]
Name=DataGrip
GenericName=DataGrip
Exec=/home/ruoli/Programs/datagrip2021/bin/datagrip.sh
Icon=/home/ruoli/Programs/datagrip2021/bin/datagrip.png
Type=Application
Categories=Development;
WebStorm
tar -zxvf WebStorm-2021.3.3.tar.gz
mv WebStorm-213.7172.31 ~/Programs/webstorm2021
vim ~/.local/share/applications/webstorm.desktop
vim ~/Programs/webstorm2021/bin/webstorm64.vmoptions
--add-opens=java.base/jdk.internal.org.objectweb.asm=ALL-UNNAMED
--add-opens=java.base/jdk.internal.org.objectweb.asm.tree=ALL-UNNAMED
-javaagent:/home/ruoli/Programs/jetbra/ja-netfilter.jar=jetbrains
webstorm.desktop
[Desktop Entry]
Name=WebStorm
GenericName=WebStorm
Exec=/home/ruoli/Programs/webstorm2021/bin/webstorm.sh
Icon=/home/ruoli/Programs/webstorm2021/bin/webstorm.png
Type=Application
Categories=Development;
日常软件
Chrome
# 谷歌浏览器
paru -S google-chrome # arch4edu仓库
VSCode
pacman -Rsn code # code-oss版 不能共存
paru -S visual-studio-code-bin # arch4edu仓库
WPS
# WPS 365 (推荐)
paru -S wps-office-365
# WPS 2019
paru -S wps-office wps-office-mui-zh-cn ttf-wps-fonts
# 字体太粗
paru -S freetype2-wps
# 官网Linux版,官网有提供 appimage 包
paru -S linuxqq # aur
paru -S wechat-universal-bwrap
请运行'wechat-universal --help'来查看可接受的命令行参数
# 设置交换目录(绝对/相对目录均可)
mkdir -p ~/.config/wechat-universal
vim ~/.config/wechat-universal/binds.list
Desktop
Downloads
网易云音乐
# 网易云音乐
paru -S netease-cloud-music
# 第三方版本(UI好看,建议)
paru -S yesplaymusic
# pacman -S osdlyrics
百度网盘
# 百度网盘
paru -S baidunetdisk-bin
Steam
# Steam
pacman -S steam
# 字体太小
- 进入大屏幕模式再退出
Motrix
# 官网有提供 appimage
mkdir -p ~/Programs/AppImage
cp -r ~/Documents/tools/AppImage/Motrix-1.8.19.AppImage ~/Programs/AppImage
cp -r ~/Documents/tools/AppImage/motrix-512x512.png ~/Programs/AppImage
vim ~/.local/share/applications/motrix.desktop
[Desktop Entry]
Name=Motrix
Comment=A download app
Exec=/home/ruoli/Programs/AppImage/Motrix-1.8.19.AppImage
Icon=/home/ruoli/Programs/AppImage/motrix-512x512.png
Type=Application
Categories=Office;Development;
可选应用
Sublime Text*
官网 Sublime Text
curl -O https://download.sublimetext.com/sublimehq-pub.gpg && sudo pacman-key --add sublimehq-pub.gpg && sudo pacman-key --lsign-key 8A8F901A && rm sublimehq-pub.gpg
echo -e "\n[sublime-text]\nServer = https://download.sublimetext.com/arch/stable/x86_64" | sudo tee -a /etc/pacman.conf
sudo pacman -Sy sublime-text
迅雷
# 迅雷(无法正常退出,下载功能正常)
paru -S xunlei-bin
Ventoy
# Ventoy(启动盘制作工具)
paru -S ventoy-bin
我的世界
# 我的世界
paru -S hmcl
Arch命令
常用命令
# 解压 tar(-z 格式 -x 解压 -c 压缩 -vf 可视化) unzip unrar 7z
tar -zxvf test.tar.gz # -z gzip -j bzip2
tar -xvf test.tar.xz
tar -xvf test.tar
tar -zcvf test.tar.gz test/
# 查看文件 less(行) more(页) head(文件头) tail
more ~/.zshrc
tail ~/.zshrc
# 查看 文件系统(-h 以M/G单位显示)
df -h
# 查看 内存与虚拟内存
free -h
cat /proc/sys/vm/swappiness # 虚拟内存使用策略 0-100. 越大越倾向使用swap
vim /etc/sysctl.conf
vm.swappiness=60 # 交换参数,Arch默认60
# 查看 文件(夹)大小 (-d 深度)
sudo du -h -d 1 /usr
# 树型展示 tree (-d 只显示目录,-L 深度)
tree -d -L 1 /usr
# 持续查看输出 watch
watch -n 1 nvidia-smi
pacman
# 查询软件包
pacman -Ss git
# 安装软件包
pacman -S git
# 查看所安装包详细信息
pacman -Qi git
# 删除包及其依赖,配置一并删除
pacman -Rsn git
# 全面系统更新,建议重启
pacman -Syu
# 删除不需要的软件包
sudo pacman -Qtdq | sudo pacman -Rns -
Git
Git全局配置
###### Git全局配置(建议) ######
git config --list
git config --global --unset xxx # 删除配置
# 1.用户信息 注:与 gitee/github账号 用户名与密码 无关
git config --global user.name "ruoli"
git config --global user.email "484158890@qq.com"
# 2.换行符转换相关
git config --global core.autocrlf input
# 3.记住用户名与密码(首次输入即可记住)
git config --global credential.helper store
简单使用
mkdir git-learn && cd git-learn
git init # 初始化版本库,即.git文件夹
touch README.md
echo ".idea\nout\ntarget" > .gitignore
git add --all
git commit --all -m "first commit"
# 远程仓库
git remote add origin https://gitee.com/yuruoli/git-learn.git
git pull # 推送前建议先合并
git push origin master # 推送
Git常用命令
###### Git日常使用 ######
git add
git commit
git pull
git push
# git日志
git log
# 查看分支
git branch
# Git远程仓库
# 不指定的话 默认 origin master
git remote -v
git remote add origin https://gitee.com/yuruoli/git-learn.git
#
git fetch
git fetch origin master
# 从远程拉取到本地
git pull
git pull origin master
# 将本地推送到远程
git push
git push origin master
Vim
Normal模式
刚进入就是 Normal
模式
h # 光标左移
j # 光标下移
k # 光标上移
l # 光标右移
数字n + h # 左移n行
u #撤销
yy # 复制
dd # 剪切
数字n + yy # 复制n行
p # 粘贴至光标所在下一行
/alias # 高亮alias
n # 下一个
N # 上一个
Cmd-Line模式
Normal模式下,按 : 进入Cmd-Line模式,按 ESC 退回普通模式
w # 保存
q # 退出
wq # 保存并退出
q! # 不保存强制退出
nohl # 取消高亮
Edit模式
Normal模式下,按 i o 进入编辑模式,按 ESC 退回普通模式
Hadoop环境搭建
SSH无密码
# .ssh目录自动生成
ssh-keygen -t rsa
ssh-copy-id ruoli-arch
环境安装目录
sudo mkdir /opt/module
sudo chown ruoli:ruoli /opt/module
Hadoop
安装(伪集群)
tar -zxvf hadoop-3.3.4.tar.gz
mv hadoop-3.3.4 /opt/module/hadoop
# 2.环境变量
sudo vim /etc/profile.d/myenv.sh
# Hadoop
export HADOOP_HOME=/opt/module/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
source /etc/profile
# 3.Hadoop配置文件
cd $HADOOP_HOME/etc/hadoop
vim core-site.xml
vim hdfs-site.xml
vim yarn-site.xml
vim mapred-site.xml
vim workers
vim capacity-scheduler.xml
# 4.初始化
hdfs namenode -format
常用命令
#启动dfs
start-dfs.sh
#启动yarn
start-yarn.sh
#启动历史服务器
mapred --daemon start historyserver
################### hdfs ##########################
# Usage: hadoop fs [generic options]
#创建目录
hadoop fs -mkdir -p /input
# 上传
hadoop fs -put ~/input/word.txt /input
#追加写
hadoop fs -appendToFile ~/input/word2.txt /input/word.txt
# 下载
hadoop fs -get /input/word.txt
# 查看 类似Linux命令 ls mv cp cat rm ...
hadoop fs -ls /input
hadoop fs -cat /input/word.txt
hadoop fs -chown ruoli:ruoli /input/word.txt
hadoop fs -chmod 777 /input/word.txt
hadoop fs -rm -r /input
#执行mapreduce应用 以下均为hdfs目录
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.4.jar wordcount /input /output
core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 指定NameNode的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://ruoli-arch:8020</value>
</property>
<!-- 指定hadoop数据的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop/data</value>
</property>
<!-- 配置HDFS网页登录使用的静态用户为ruoli -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>ruoli</value>
</property>
<!-- 配置ruoli(superUser)允许通过代理访问的主机节点 -->
<property>
<name>hadoop.proxyuser.ruoli.hosts</name>
<value>*</value>
</property>
<!-- 配置ruoli(superUser)允许通过代理用户所属组 -->
<property>
<name>hadoop.proxyuser.ruoli.groups</name>
<value>*</value>
</property>
<!-- 配置ruoli(superUser)允许通过代理的用户-->
<property>
<name>hadoop.proxyuser.ruoli.users</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- nn web端访问地址-->
<property>
<name>dfs.namenode.http-address</name>
<value>ruoli-arch:9870</value>
</property>
<!-- 2nn web端访问地址-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>ruoli-arch:9868</value>
</property>
<!-- 测试环境指定HDFS副本的数量1 -->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
yarn-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 指定MR走shuffle -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定ResourceManager的地址-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>ruoli-arch</value>
</property>
<!-- 环境变量的继承 -->
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<!--yarn单个容器允许分配的最大最小内存 -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>6144</value>
</property>
<!--yarn单个容器允许分配的最大cpu核数 -->
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>8</value>
</property>
<!-- yarn容器允许管理的物理内存大小 -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>6144</value>
</property>
<!-- yarn容器允许管理的CPU核数 -->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>8</value>
</property>
<!-- 关闭yarn对物理内存和虚拟内存的限制检查 -->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!-- 开启日志聚集功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 设置日志聚集服务器地址 -->
<property>
<name>yarn.log.server.url</name>
<value>http://ruoli-arch:19888/jobhistory/logs</value>
</property>
<!-- 设置日志保留时间为7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 指定MapReduce程序运行在Yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 单个Map Task申请的container容器 -->
<property>
<name>mapreduce.map.memory.mb</name>
<value>1024</value>
</property>
<property>
<name>mapreduce.map.cpu.vcores</name>
<value>1</value>
</property>
<!-- 单个Reduce Task申请的container容器 -->
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>1024</value>
</property>
<property>
<name>mapreduce.reduce.cpu.vcores</name>
<value>1</value>
</property>
<!-- 历史服务器端地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>ruoli-arch:10020</value>
</property>
<!-- 历史服务器web端地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>ruoli-arch:19888</value>
</property>
</configuration>
workers
ruoli-arch
capacity-scheduler.xml
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.8</value>
</property>
hdp.sh 启停
vim ~/bin/hdp.sh
chmod 755 ~/bin/hdp.sh
#!/bin/bash
if [ $# -lt 1 ]; then
echo "No Args Input..."
exit
fi
case $1 in
"start")
echo " =================== 启动 hadoop集群 ==================="
echo " --------------- 启动 hdfs ---------------"
ssh ruoli-arch "/opt/module/hadoop/sbin/start-dfs.sh"
echo " --------------- 启动 yarn ---------------"
ssh ruoli-arch "/opt/module/hadoop/sbin/start-yarn.sh"
echo " --------------- 启动 historyserver ---------------"
ssh ruoli-arch "/opt/module/hadoop/bin/mapred --daemon start historyserver"
;;
"stop")
echo " =================== 关闭 hadoop集群 ==================="
echo " --------------- 关闭 historyserver ---------------"
ssh ruoli-arch "/opt/module/hadoop/bin/mapred --daemon stop historyserver"
echo " --------------- 关闭 yarn ---------------"
ssh ruoli-arch "/opt/module/hadoop/sbin/stop-yarn.sh"
echo " --------------- 关闭 hdfs ---------------"
ssh ruoli-arch "/opt/module/hadoop/sbin/stop-dfs.sh"
;;
*)
echo "Input Args Error..."
;;
esac
Zookeeper
安装(Standalone)
tar -zxvf apache-zookeeper-3.7.1-bin.tar.gz
mv apache-zookeeper-3.7.1-bin/ /opt/module/zookeeper
sudo vim /etc/profile.d/myenv.sh
# Zookeeper
export ZOOKEEPER_HOME=/opt/module/zookeeper
export PATH=$PATH:$ZOOKEEPER_HOME/bin
source /etc/profile
# 3.配置文件
mkdir -p $ZOOKEEPER_HOME/zkData
echo '1' > $ZOOKEEPER_HOME/zkData/myid
mv $ZOOKEEPER_HOME/conf/zoo_sample.cfg $ZOOKEEPER_HOME/conf/zoo.cfg
vim $ZOOKEEPER_HOME/conf/zoo.cfg
dataDir=/opt/module/zookeeper/zkData
clientPort=2181
常用命令
#往往是集群。需要在每个服务器都启动。一般写成脚本启动
zkServer.sh start
zkServer.sh stop
zkServer.sh status
# 启动客户端
zkCli.sh
# 查看znode
ls /
get /zookeeper
quit
zoo.cfg
#修改
dataDir=/opt/module/zookeeper/zkData
clientPort=2181
# 2182 2183
#添加
####################### cluster ##########################
server.2=ruoli-arch:2888:3888
server.3=ruoli-arch:2889:3889
server.4=ruoli-arch:2890:3890
zk.sh 启停
vim ~/bin/zk.sh
chmod 755 ~/bin/zk.sh
#!/bin/bash
case $1 in
"start") {
for i in ruoli-arch; do
echo ---------- zookeeper $i 启动 ------------
ssh $i "/opt/module/zookeeper/bin/zkServer.sh start"
done
} ;;
"stop") {
for i in ruoli-arch; do
echo ---------- zookeeper $i 停止 ------------
ssh $i "/opt/module/zookeeper/bin/zkServer.sh stop"
done
} ;;
"status") {
for i in ruoli-arch; do
echo ---------- zookeeper $i 状态 ------------
ssh $i "/opt/module/zookeeper/bin/zkServer.sh status"
done
} ;;
esac
Kafka
安装 standalone
tar -zxvf kafka_2.12-3.3.1.tgz
mv kafka_2.12-3.3.1/ /opt/module/kafka
# 2.环境变量
sudo vim /etc/profile.d/myenv.sh
# Kafka
export KAFKA_HOME=/opt/module/kafka
export PATH=$PATH:$KAFKA_HOME/bin
source /etc/profile
# 3.配置文件
vim $KAFKA_HOME/config/server.properties
# broker的全局唯一编号
broker.id=0
# broker对外暴露的IP和端口
advertised.listeners=PLAINTEXT://ruoli-arch:9092
# kafka运行日志(数据)存放的路径
log.dirs=/opt/module/kafka/datas
# partitions个数,一般配置和cpu核心数相同
num.partitions=8
# 配置连接Zookeeper集群地址(在zk根目录下创建/kafka,方便管理)
zookeeper.connect=ruoli-arch:2181/kafka
常用命令
# 创建topic
kafka-topics.sh --bootstrap-server ruoli-arch:9092 --create --topic test
# topic列表
kafka-topics.sh --bootstrap-server ruoli-arch:9092 --list
# 控制台生产者
kafka-console-producer.sh --bootstrap-server ruoli-arch:9092 --topic test
# 控制台消费者
kafka-console-consumer.sh --bootstrap-server ruoli-arch:9092 --topic test
--group #消费者组id
--partition 0
--offset earliest
kf.sh 启停
vim ~/bin/kf.sh
chmod 755 ~/bin/kf.sh
#! /bin/bash
case $1 in
"start") {
for i in ruoli-arch; do
echo " --------启动 $i Kafka-------"
ssh $i "/opt/module/kafka/bin/kafka-server-start.sh -daemon /opt/module/kafka/config/server.properties"
done
} ;;
"stop") {
for i in ruoli-arch; do
echo " --------停止 $i Kafka-------"
ssh $i "/opt/module/kafka/bin/kafka-server-stop.sh "
done
} ;;
esac
Hive
安装
tar -zxvf hive-3.1.3.tar.gz
mv apache-hive-3.1.3-bin/ /opt/module/hive
# 环境变量
sudo vim /etc/profile.d/myenv.sh
# Hive
export HIVE_HOME=/opt/module/hive
export PATH=$PATH:$HIVE_HOME/bin
source /etc/profile
# 配置文件
cd /opt/module/hive/conf
mv hive-log4j2.properties.template hive-log4j2.properties
touch hive-site.xml log4j.properties
mv hive-env.sh.template hive-env.sh
vim hive-site.xml
vim hive-log4j2.properties
vim log4j.properties # hive打印日志过多问题
# 堆大小
vim hive-env.sh
# 日志包冲突
mv $HIVE_HOME/lib/log4j-slf4j-impl-2.17.1.jar $HIVE_HOME/lib/log4j-slf4j-impl-2.17.1.jar.bak
cp mysql/mysql-connector-j-8.0.31.jar $HIVE_HOME/lib/
mysql -uroot -proot
# SQL
create database metastore;
quit;
#初始化Hive元数据库
schematool -initSchema -dbType mysql -verbose
mysql -uroot -proot
# SQL
use metastore;
alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8;
alter table TABLE_PARAMS modify column PARAM_VALUE mediumtext character set utf8;
quit;
常用命令
# 需要启动Hadoop
hdp.sh start
# 客户端连接需要hiveserver2和metastore服务
# 进入命令行客户端
hive
# HQL
set hive.execution.engine=mr;
show databases;
create database bigdata;
create table student(id int, name string);
insert into student values(1, "mapreduce");
select * from student;
quit;
hive -e "select * from student;"
Hive SQL
--创建数据库
CREATE DATABASE [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];
--创建表
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...)
[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[ROW FORMAT row_format]
[STORED AS file_format]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)]
--Load
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)];
--Insert
INSERT (INTO | OVERWRITE) TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement;
--导出
EXPORT TABLE tablename TO 'export_target_path'
--导入
IMPORT [EXTERNAL] TABLE new_or_original_tablename FROM 'source_path' [LOCATION 'import_target_path']
--查询
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
FROM table_reference -- 从什么表查
[WHERE where_condition] -- 过滤
[GROUP BY col_list] -- 分组查询
[HAVING col_list] -- 分组后过滤
[ORDER BY col_list] -- 排序
[CLUSTER BY col_list | [DISTRIBUTE BY col_list] [SORT BY col_list]]
[LIMIT number] -- 限制输出的行数
hive-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!--配置Hive保存元数据信息所需的 MySQL URL地址-->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://ruoli-arch:3306/metastore?useSSL=false&useUnicode=true&characterEncoding=UTF-8&allowPublicKeyRetrieval=true</value>
</property>
<!--配置Hive连接MySQL的驱动全类名-->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>
<!--配置Hive连接MySQL的用户名 -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<!--配置Hive连接MySQL的密码 -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>root</value>
</property>
<!-- Hive默认在HDFS的工作目录 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/hive/warehouse</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<!-- 指定hiveserver2连接的host -->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>ruoli-arch</value>
</property>
<!-- 指定hiveserver2连接的端口号 -->
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>
<!-- Hive客户端显示当前库和表头 -->
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<!--Hive执行引擎-->
<property>
<name>hive.execution.engine</name>
<value>spark</value>
</property>
<!-- Spark依赖位置 -->
<property>
<name>spark.yarn.jars</name>
<value>hdfs://ruoli-arch:8020/spark/spark-jars/*</value>
</property>
</configuration>
hive-log4j2.properties
property.hive.log.dir=/opt/module/hive/logs
hive-env.sh
export HADOOP_HEAPSIZE=2048
log4j.properties
log4j.rootLogger=WARN, CA
log4j.appender.CA=org.apache.log4j.ConsoleAppender
log4j.appender.CA.layout=org.apache.log4j.PatternLayout
log4j.appender.CA.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
hv.sh 启停
vim ~/bin/hv.sh
chmod 755 ~/bin/hv.sh
#!/bin/bash
HIVE_LOG_DIR=$HIVE_HOME/logs
if [ ! -d $HIVE_LOG_DIR ]; then
mkdir -p $HIVE_LOG_DIR
fi
#检查进程是否运行正常,参数1为进程名,参数2为进程端口
function check_process() {
pid=$(ps -ef 2>/dev/null | grep -v grep | grep -i $1 | awk '{print $2}')
# ppid=$(netstat -nltp 2>/dev/null | grep $2 | awk '{print $7}' | cut -d '/' -f 1)
ppid=$(ss -nltp 2>/dev/null | grep $2 | awk '{print $6}' | cut -d '=' -f 2 | cut -d ',' -f 1)
echo $pid
[[ "$pid" =~ "$ppid" ]] && [ "$ppid" ] && return 0 || return 1
}
function hive_start() {
metapid=$(check_process HiveMetastore 9083)
cmd="nohup hive --service metastore >$HIVE_LOG_DIR/metastore.log 2>&1 &"
[ -z "$metapid" ] && eval $cmd && echo "Metastroe服务正在启动" || echo "Metastroe服务已启动"
server2pid=$(check_process HiveServer2 10000)
cmd="nohup hive --service hiveserver2 >$HIVE_LOG_DIR/hiveServer2.log 2>&1 &"
[ -z "$server2pid" ] && eval $cmd && echo "HiveServer2服务正在启动" || echo "HiveServer2服务已启动"
}
function hive_stop() {
metapid=$(check_process HiveMetastore 9083)
[ "$metapid" ] && kill $metapid && echo "Metastroe服务正在关闭" || echo "Metastore服务未启动"
server2pid=$(check_process HiveServer2 10000)
[ "$server2pid" ] && kill $server2pid && echo "HiveServer2服务正在关闭" || echo "HiveServer2服务未启动"
}
case $1 in
"start")
hive_start
sleep 4
;;
"stop")
hive_stop
sleep 2
;;
"restart")
hive_stop
sleep 2
hive_start
sleep 4
;;
"status")
check_process HiveMetastore 9083 >/dev/null && echo "Metastore服务运行正常" || echo "Metastore服务运行异常"
check_process HiveServer2 10000 >/dev/null && echo "HiveServer2服务运行正常" || echo "HiveServer2服务运行异常"
;;
*)
echo Invalid Args!
echo 'Usage: '$(basename $0)' start|stop|restart|status'
;;
esac
Spark
安装
tar -zxvf spark-3.3.1-bin-hadoop3.tgz
mv spark-3.3.1-bin-hadoop3/ /opt/module/spark
# 环境变量
sudo vim /etc/profile.d/myenv.sh
# Spark
export SPARK_HOME=/opt/module/spark
export PATH=$PATH:$SPARK_HOME/bin
export PATH=$PATH:$SPARK_HOME/sbin
source /etc/profile
# 测试:本地模式开箱即用
spark-shell
scala> # master = local[*] ,读取本地相对目录
sc.textFile("input/word.txt").flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).collect
# 配置 Spark on Yarn
# 日志冲突
mv $SPARK_HOME/jars/log4j-slf4j-impl-2.17.2.jar $SPARK_HOME/jars/log4j-slf4j-impl-2.17.2.jar.bak
hadoop fs -mkdir -p /spark/history
cd $SPARK_HOME/conf
mv spark-env.sh.template spark-env.sh
mv spark-defaults.conf.template spark-defaults.conf
vim spark-env.sh
vim spark-defaults.conf
# 测试
hadoop fs -mkdir -p /user/ruoli/input
hadoop fs -put ~/input/word.txt /user/ruoli/input
spark-shell
scala> # 此时 master = yarn,读取HDFS目录
sc.textFile("input/word.txt").flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).collect
Hive on Spark
# Hive on Spark
tar -zxvf spark-3.3.1-bin-without-hadoop.tgz
hadoop fs -mkdir -p /spark/spark-jars
hadoop fs -put spark-3.3.1-bin-without-hadoop/jars/* /spark/spark-jars
rm -rf spark-3.3.1-bin-without-hadoop
vim $SPARK_HOME/conf/spark-env.sh
# Hive on Spark
export SPARK_DIST_CLASSPATH=$(hadoop classpath)
cp $SPARK_HOME/conf/spark-defaults.conf $HIVE_HOME/conf/
vim $HIVE_HOME/conf/hive-site.xml
<!--Hive执行引擎-->
<property>
<name>hive.execution.engine</name>
<value>spark</value>
</property>
<!-- Spark依赖位置 -->
<property>
<name>spark.yarn.jars</name>
<value>hdfs://ruoli-arch:8020/spark/spark-jars/*</value>
</property>
# 测试
hive
# HQL
insert into student values(2, "spark");
select * from student;
quit;
常用命令
# 启停历史服务器
start-history-server.sh
stop-history-server.sh
# 1.本地模式 local
spark-submit --class org.apache.spark.examples.SparkPi --master local $SPARK_HOME/examples/jars/spark-examples_2.12-3.3.1.jar 10
# 2.Yarn模式 cluster/client
# client: Driver程序运行在客户端,适用于测试环境
spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode client $SPARK_HOME/examples/jars/spark-examples_2.12-3.3.1.jar 10
# cluster: Driver程序运行在由ResourceManager启动的 AppMaster,适用于生产环境
spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode cluster $SPARK_HOME/examples/jars/spark-examples_2.12-3.3.1.jar 10
Spark SQL
-- SQL风格语法(主要)
-- DSL风格语法(次要)
spark-env.sh
HADOOP_CONF_DIR=/opt/module/hadoop/etc/hadoop
YARN_CONF_DIR=/opt/module/hadoop/etc/hadoop
# Spark历史服务器
export SPARK_HISTORY_OPTS="
-Dspark.history.ui.port=18080
-Dspark.history.fs.logDirectory=hdfs://ruoli-arch:8020/spark/history
-Dspark.history.retainedApplications=30"
# 解决warn
export LD_LIBRARY_PATH=/opt/module/hadoop/lib/native
# Hive on Spark
export SPARK_DIST_CLASSPATH=$(hadoop classpath)
spark-defaults.conf
spark.master yarn
spark.eventLog.enabled true
spark.eventLog.dir hdfs://ruoli-arch:8020/spark/history
spark.driver.memory 2g
spark.executor.memory 2g
# 从Yarn上关联到Spark历史服务器
spark.yarn.historyServer.address ruoli-arch:18080
spark.history.ui.port 18080
Flink
安装
tar -zxvf flink-1.17.1-bin-scala_2.12.tgz
mv flink-1.17.1/ /opt/module/flink
# 环境变量(Flink on Yarn)
sudo vim /etc/profile.d/myenv.sh
# Flink
export FLINK_HOME=/opt/module/flink
export PATH=$PATH:$FLINK_HOME/bin
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_CLASSPATH=`hadoop classpath`
source /etc/profile
# 配置文件
cd /opt/module/flink/conf/
vim flink-conf.yaml
vim masters
vim workers
# 日志冲突
mv $FLINK_HOME/lib/log4j-slf4j-impl-2.17.1.jar $FLINK_HOME/lib/log4j-slf4j-impl-2.17.1.jar.bak
常用命令
#集群启停(StandAlone模式才需要)
start-cluster.sh
stop-cluster.sh
#历史服务器启停
historyserver.sh start
historyserver.sh stop
Flink有两种运行方式:Standalone 与 Yarn(重要)
####### YARN(推荐使用) #######
# 会话模式(适合多个小任务)
yarn-session.sh -d -nm test
flink run -c com.ruoli.flink.learn.app.wc.SocketStreamWordCount ~/input/flink-learn-1.0.jar
# 单作业模式
flink run -d -t yarn-per-job -c com.ruoli.flink.learn.app.wc.SocketStreamWordCount ~/input/flink-learn-1.0.jar
# 应用模式(常用)
flink run-application -t yarn-application -c com.ruoli.flink.learn.app.wc.SocketStreamWordCount ~/input/flink-learn-1.0.jar
####### Standalone(了解) #######
# 会话模式(需要启动集群)
start-cluster.sh
stop-cluster.sh
flink run -m ruoli-arch:8081 -c com.ruoli.flink.learn.app.wc.SocketStreamWordCount ~/input/flink-learn-1.0.jar
# 单作业模式
Standalone不支持
# 应用模式
cp ~/input/flink-learn-1.0.jar $FLINK_HOME/lib/
#启动JobManager(跑完自动关闭)
standalone-job.sh start --job-classname com.ruoli.flink.learn.app.wc.SocketStreamWordCount
#启动TaskManager,此时才开始执行任务
taskmanager.sh start
#跑完之后需关闭 taskmanager
taskmanager.sh stop
Flink SQL
动态表-持续查询
#基于yarn-session模式启动Flink
bin/yarn-session.sh -d
#Sql客户端
bin/sql-client.sh embedded -s yarn-session
#从初始化文件启动。Flink中表是一种动态表(持续查询)。停则无
bin/sql-client.sh embedded -s yarn-session -i conf/sql-client-init.sql
#### sql-client-init.sql ####
SET sql-client.execution.result-mode=tableau;
CREATE DATABASE mydatabase;
Flink SQL>quit;
配置相关
# 默认table,还可以设置为tableau、changelog
SET sql-client.execution.result-mode=tableau;
# 默认streaming,也可以设置batch
SET execution.runtime-mode=streaming;
# 并行度
SET parallelism.default=1;
# TTL
SET table.exec.state.ttl=1000;
DDL
-- 创建数据库
CREATE DATABASE [IF NOT EXISTS] [catalog_name.]db_name
[COMMENT database_comment]
WITH (key1=val1, key2=val2, ...)
ALTER DATABASE [catalog_name.]db_name SET (key1=val1, key2=val2, ...)
DROP DATABASE [IF EXISTS] [catalog_name.]db_name [ (RESTRICT | CASCADE) ]
USE database_name;
-- 创建表
CREATE TABLE [IF NOT EXISTS] [catalog_name.][db_name.]table_name
(
{ <physical_column_definition> | <metadata_column_definition> | <computed_column_definition> }[ , ...n]
[ <watermark_definition> ]
[ <table_constraint> ][ , ...n]
)
[COMMENT table_comment]
[PARTITIONED BY (partition_column_name1, partition_column_name2, ...)]
WITH (key1=val1, key2=val2, ...)
[ LIKE source_table [( <like_options> )] | AS select_query ]
-- 一般 with 中的配置项由 Flink SQL 的 Connector(链接外部存储的连接器) 来定义,每种 Connector 提供的with 配置项都是不同的
-- metadata_column_definition
-- 元数据列是 SQL 标准的扩展,允许访问数据源本身具有的一些元数据
`record_time` TIMESTAMP_LTZ(3) METADATA FROM 'timestamp'
-- watermark_definition 水位线定义
-- 1.严格升序:
WATERMARK FOR rowtime_column AS rowtime_column。
-- 2.递增:
WATERMARK FOR rowtime_column AS rowtime_column - INTERVAL '0.001' SECOND
-- 3.有界无序:可以用于设置最大乱序时间,一般都用这种 Watermark 生成策略。迟到5s
WATERMARK FOR rowtime_column AS rowtime_column – INTERVAL '5' timeUnit
-- table_constraint 支持主键
PARYMARY KEY(user_id) not enforced
ALTER TABLE table_name RENAME TO new_table_name
ALTER TABLE table_name SET (key1=val1, key2=val2, ...)
DROP TABLE [IF EXISTS] table_name
-- 事件时间 + WATERMARK
CREATE TABLE EventTable(
user STRING,
url STRING,
ts TIMESTAMP(3),
WATERMARK FOR ts AS ts - INTERVAL '5' SECOND
) WITH (
...
);
-- 处理时间 AS PROCTIME()
CREATE TABLE ProcessTable(
user STRING,
url STRING,
ts AS PROCTIME()
) WITH (
...
);
-------------------- with connector 案例 --------------------
-- kafka
-- 普通Kafka表
CREATE TABLE t1(
`event_time` TIMESTAMP(3) METADATA FROM 'timestamp',
--列名和元数据名一致可以省略 FROM 'xxxx', VIRTUAL表示只读
`partition` BIGINT METADATA VIRTUAL,
`offset` BIGINT METADATA VIRTUAL,
id int,
ts bigint ,
vc int
) WITH (
'connector' = 'kafka',
'properties.bootstrap.servers' = 'hadoop103:9092',
'properties.group.id' = 'yuruoli',
-- 'earliest-offset', 'latest-offset', 'group-offsets', 'timestamp' and 'specific-offsets'
'scan.startup.mode' = 'earliest-offset',
-- fixed为flink实现的分区器,一个并行度只写往kafka一个分区
'sink.partitioner' = 'fixed',
'topic' = 'ws1',
'format' = 'json'
)
-- upsert-kafka表
CREATE TABLE t2(
id int ,
sumVC int ,
primary key (id) NOT ENFORCED
)
WITH (
'connector' = 'upsert-kafka',
'properties.bootstrap.servers' = 'hadoop102:9092',
'topic' = 'ws2',
'key.format' = 'json',
'value.format' = 'json'
)
-- file
CREATE TABLE t3( id int, ts bigint , vc int )
WITH (
'connector' = 'filesystem',
'path' = 'hdfs://hadoop102:8020/data/t3',
'format' = 'csv'
)
--jdbc
--MySQL表
CREATE TABLE `ws2` (
`id` int(11) NOT NULL,
`ts` bigint(20) DEFAULT NULL,
`vc` int(11) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
CREATE TABLE t4(
id INT,
ts BIGINT,
vc INT,
PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector'='jdbc',
'url' = 'jdbc:mysql://hadoop102:3306/test?useUnicode=true&characterEncoding=UTF-8',
'username' = 'root',
'password' = 'root',
'connection.max-retry-timeout' = '60s',
'table-name' = 'ws2',
'sink.buffer-flush.max-rows' = '500',
'sink.buffer-flush.interval' = '5s',
'sink.max-retries' = '3',
'sink.parallelism' = '1'
);
DQL
Flink SQL查询
流 => 表
- 更新查询:select count(*) from test;
- 追加查询:select * from test;
表 => 流
- 仅追加(Append-only)流
- 撤回(Retract)流
- 更新插入(Upsert)流
DQL
可以先去案例创表。方便测试
select * from source;
INSERT INTO sink select * from source; --启动了一个job
select * from sink;
---------------------- 分组窗口聚合(已被TVF取代) ----------------------
-- SQL中只支持基于时间的窗口,不支持基于元素个数的窗口。
TUMBLE(time_attr, interval) --滚动窗口
HOP(time_attr, interval, interval) --滑动窗口:滑动步长 窗口长度
SESSION(time_attr, interval) --会话窗口
select
id,
TUMBLE_START(et, INTERVAL '5' SECOND) wstart,
TUMBLE_END(et, INTERVAL '5' SECOND) wend,
sum(vc) sumVc
from ws
group by id, TUMBLE(et, INTERVAL '5' SECOND);
-- 窗口表值函数(TVF)聚合
FROM TABLE(
窗口类型(TABLE 表名, DESCRIPTOR(时间字段),INTERVAL时间…)
)
GROUP BY [window_start,][window_end,] --可选
SELECT
window_start,
window_end,
id,
SUM(vc) sumVC
FROM TABLE(
TUMBLE(TABLE ws, DESCRIPTOR(et), INTERVAL '5' SECONDS)
--HOP(TABLE ws, DESCRIPTOR(et), INTERVAL '5' SECONDS , INTERVAL '10' SECONDS)
-- 累积窗口 6s的窗口 每2s计算一次
-- CUMULATE(TABLE ws, DESCRIPTOR(et), INTERVAL '2' SECONDS , INTERVAL '6' SECONDS)
)
GROUP BY window_start, window_end, id;
---------------------- OVER ----------------------
SELECT
agg_func(agg_col) OVER (
[PARTITION BY col1[, col2, ...]]
ORDER BY time_col
range_definition),
...
FROM ...
-- 基于时间
SELECT
id, et, vc,
count(vc) OVER (
PARTITION BY id
ORDER BY et
RANGE BETWEEN INTERVAL '10' SECOND PRECEDING AND CURRENT ROW
) AS cnt
FROM ws
-- window 子句
SELECT
id, et, vc,
count(vc) OVER w AS cnt,
sum(vc) OVER w AS sumVC
FROM ws
WINDOW w AS (
PARTITION BY id
ORDER BY et
RANGE BETWEEN INTERVAL '10' SECOND PRECEDING AND CURRENT ROW
)
-- 基于行数
SELECT
id, et, vc,
avg(vc) OVER w AS avgVC,
count(vc) OVER w AS cnt
FROM ws
WINDOW w AS (
PARTITION BY id
ORDER BY et
ROWS BETWEEN 5 PRECEDING AND CURRENT ROW
)
---------------------- TOP-N ----------------------
SELECT [column_list]
FROM (
SELECT [column_list],
ROW_NUMBER() OVER ([PARTITION BY col1[, col2...]]
ORDER BY col1 [asc|desc][, col2 [asc|desc]...]) AS rownum
FROM table_name)
WHERE rownum <= N [AND conditions]
select
id, et, vc, rownum
from (
select
id, et, vc,
row_number() over(
partition by id
order by vc desc
) as rownum
from ws
)
where rownum<=3;
---------------------- Deduplication去重 ----------------------
SELECT [column_list]
FROM (
SELECT [column_list],
ROW_NUMBER() OVER ([PARTITION BY col1[, col2...]] ORDER BY time_attr [asc|desc]) AS rownum
FROM table_name
)
WHERE rownum = 1
select
id, et, vc,
rownum
from (
select id,et,vc,
row_number() over(
partition by id,vc
order by et
) as rownum
from ws
)
where rownum=1;
-------------------------- JOIN --------------------------
------ 常规联结查询 Regular Join ------
-- 等值内联结(INNER Equi-JOIN)
SELECT ws.id, ws.vc, ws1.id, ws1.vc
FROM ws
INNER JOIN ws1
ON ws.id = ws1.id
-- 等值外联结(OUTER Equi-JOIN)
SELECT ws.id, ws.vc, ws1.id, ws1.vc
FROM ws
LEFT JOIN ws1
ON ws.id = ws1.id
SELECT ws.id, ws.vc, ws1.id, ws1.vc
FROM ws
RIGHT JOIN ws1
ON ws.id = ws1.id
SELECT ws.id, ws.vc, ws1.id, ws1.vc
FROM ws
FULL OUTER JOIN ws1
ON ws.id = ws.id
------ 间隔联结查询 Interval Join ------
-- 不使用Join关键字
SELECT *
FROM ws,ws1
WHERE ws.id = ws1. id
AND ws.et BETWEEN ws1.et - INTERVAL '2' SECOND AND ws1.et + INTERVAL '2' SECOND
------ 维表联结查询 Lookup Join ------
-- Lookup Join 是流与 Redis,Mysql,HBase 这种外部存储介质的 Join。仅支持处理时间字段。
表A
JOIN 维度表名 FOR SYSTEM_TIME AS OF 表A.proc_time AS 别名
ON xx.字段=别名.字段
CREATE TABLE Customers (
id INT,
name STRING,
country STRING,
zip STRING
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://hadoop102:3306/customerdb',
'table-name' = 'customers'
);
-- order表每来一条数据,都会去mysql的customers表查找维度数据
SELECT o.order_id, o.total, c.country, c.zip
FROM Orders AS o
JOIN Customers FOR SYSTEM_TIME AS OF o.proc_time AS c
ON o.customer_id = c.id;
-- order by
-- 实时任务中,Order By 子句中必须要有时间属性字段,并且必须写在最前面且为升序。
SELECT *
FROM ws
ORDER BY et, id desc
-- UNION 和 UNION ALL
-- UNION:将集合合并并且去重
-- UNION ALL:将集合合并,不做去重。
(SELECT id FROM ws) UNION (SELECT id FROM ws1);
(SELECT id FROM ws) UNION ALL (SELECT id FROM ws1);
-- Intersect 和 Intersect All
-- Intersect:交集并且去重
-- Intersect ALL:交集不做去重
(SELECT id FROM ws) INTERSECT (SELECT id FROM ws1);
(SELECT id FROM ws) INTERSECT ALL (SELECT id FROM ws1);
-- Except 和 Except All
-- Except:差集并且去重
-- Except ALL:差集不做去重
(SELECT id FROM ws) EXCEPT (SELECT id FROM ws1);
(SELECT id FROM ws) EXCEPT ALL (SELECT id FROM ws1);
DQL案例
CREATE TABLE source (
id INT,
ts BIGINT,
vc INT
) WITH (
'connector' = 'datagen',
'rows-per-second'='1',
'fields.id.kind'='random',
'fields.id.min'='1',
'fields.id.max'='10',
'fields.ts.kind'='sequence',
'fields.ts.start'='1',
'fields.ts.end'='1000000',
'fields.vc.kind'='random',
'fields.vc.min'='1',
'fields.vc.max'='100'
);
CREATE TABLE sink (
id INT,
ts BIGINT,
vc INT
) WITH (
'connector' = 'print'
);
CREATE TABLE ws (
id INT,
vc INT,
pt AS PROCTIME(), --处理时间
et AS cast(CURRENT_TIMESTAMP as timestamp(3)), --事件时间
WATERMARK FOR et AS et - INTERVAL '5' SECOND --watermark
) WITH (
'connector' = 'datagen',
'rows-per-second' = '10',
'fields.id.min' = '1',
'fields.id.max' = '3',
'fields.vc.min' = '1',
'fields.vc.max' = '100'
);
CREATE TABLE ws1 (
id INT,
vc INT,
pt AS PROCTIME(), --处理时间
et AS cast(CURRENT_TIMESTAMP as timestamp(3)), --事件时间
WATERMARK FOR et AS et - INTERVAL '0.001' SECOND --watermark
) WITH (
'connector' = 'datagen',
'rows-per-second' = '1',
'fields.id.min' = '3',
'fields.id.max' = '5',
'fields.vc.min' = '1',
'fields.vc.max' = '100'
);
flink-conf.yaml
# JobManager节点地址.
jobmanager.rpc.address: ruoli-arch
jobmanager.bind-host: 0.0.0.0
# TaskManager节点地址.需要配置为当前机器名
taskmanager.bind-host: 0.0.0.0
taskmanager.host: ruoli-arch
taskmanager.numberOfTaskSlots: 8
rest.address: ruoli-arch
rest.bind-address: 0.0.0.0
classloader.check-leaked-classloader: false
############### 历史服务器 ##############
jobmanager.archive.fs.dir: hdfs://ruoli-arch:8020/flink/history
historyserver.web.address: ruoli-arch
historyserver.web.port: 8082
historyserver.archive.fs.dir: hdfs://ruoli-arch:8020/flink/history
historyserver.archive.fs.refresh-interval: 5000
############### 状态后端 ##############
state.backend.type: hashmap
state.checkpoints.dir: hdfs://ruoli-arch:8020/flink/checkpoints
state.savepoints.dir: hdfs://ruoli-arch:8020/flink/savepoints
workers
ruoli-arch
masters
ruoli-arch:8081
Hbase
安装
tar -zxvf hbase-2.4.11-bin.tar.gz
mv hbase-2.4.11 /opt/module/hbase
# 2.环境变量
sudo vim /etc/profile.d/myenv.sh
# HBase
export HBASE_HOME=/opt/module/hbase
export PATH=$PATH:$HBASE_HOME/bin
source /etc/profile
# 3.配置文件
cd /opt/module/hbase/conf
vim hbase-env.sh
vim hbase-site.xml
vim regionservers
UI:ruoli-arch:16010
常用命令
#启动
start-hbase.sh
#关闭
stop-hbase.sh
################## HBase客户端命令 #################
hbase shell
# hbase语法
help
# 命名空间
list_namespace
create_namespace 'bigdata'
# 创建表并插入数据 (表名 列族) (表名 主键 列Cell)
create 'student','info'
put 'student','1001','info:sex','male'
put 'student','1001','info:age','18'
put 'student','1002','info:name','Janna'
put 'student','1002','info:sex','female'
put 'student','1002','info:age','20'
# 列出表
list
# 展示数据
scan 'student'
create 'bigdata:student','info'
list_namespace_tables 'bigdata'
scan 'bigdata:student'
# 删除表需要先disable
disable 'bigdata:student'
drop 'bigdata:student'
exit
hbase-env.sh
export HBASE_MANAGES_ZK=false
hbase-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hbase.zookeeper.quorum</name>
<value>ruoli-arch</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://ruoli-arch:8020/hbase</value>
</property>
<property>
<name>hbase.wal.provider</name>
<value>filesystem</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>./tmp</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
</configuration>
regionservers
ruoli-arch
Redis
安装
# tar -zxvf redis-6.2.6.tar.gz
# cd redis-6.2.6
# make
sudo pacman -S redis
# 配置文件
sudo vim /etc/redis/redis.conf
# bind 127.0.0.1
protected-mode no
# 启动服务
sudo systemctl start redis
常用命令
sudo systemctl start redis
# 命令行客户端
redis-cli
redis-cli -h ruoli-arch -p 6379
127.0.0.1:6379> ping
PONG
127.0.0.1:6379> shutdown
127.0.0.1:6379> quit
# 直接退出不会关闭服务。需要手动关
redis-cli shutdown
redis 启停
sudo systemctl start redis
systemctl status redis
sudo systemctl stop redis
Doris
安装
cd doris/x64_avx2
chmod 755 install.sh
chmod 755 start.sh
vim install.sh
./install.sh #安装
cp -r start.sh ~/bin/dr.sh
vim ~/bin/dr.sh
# 临时(必须设置,不然启动不了doris后端,可将该行写入 dr.sh)
sudo sysctl -w vm.max_map_count=2000000
# 永久(貌似无效)
sudo vim /etc/sysctl.conf
vm.max_map_count=2000000
dr.sh start
# UI地址为 http://ruoli-arch:7030
# 用户名:root 密码:root 即内置mysql登录用户
dr.sh stop
install.sh 安装脚本
脚本与安装包目录一致
#!/bin/bash
#hosts=(hadoop102 hadoop103 hadoop104)
hosts=(ruoli-arch)
# 安装前清除旧的 doris
for host in ${hosts[*]}; do
ssh $host " /opt/module/doris/be/bin/stop_be.sh ;\
/opt/module/doris/fe/bin/stop_fe.sh ;\
rm -rf /opt/module/doris ; \
sudo rm -rf /etc/doris
"
done
# 创建 doris 用的目录
mkdir -p /opt/module/doris
# 解压 fe
tar -xvf *fe* -C /opt/module/doris
mv /opt/module/doris/*fe* /opt/module/doris/fe
# 解压 be
tar -xvf *be* -C /opt/module/doris
mv /opt/module/doris/*be* /opt/module/doris/be
# 解压依赖
tar -xvf *dependencies* -C /opt/module/doris
mv /opt/module/doris/*dependencies* /opt/module/doris/dependencies
cp /opt/module/doris/dependencies/java-udf-jar-with-dependencies.jar /opt/module/doris/be/lib
# 配置 fe
echo 'priority_networks = 192.168.10.0/24' >>/opt/module/doris/fe/conf/fe.conf
echo 'http_port = 7030' >>/opt/module/doris/fe/conf/fe.conf
# 配置 be
echo 'priority_networks = 192.168.10.0/24' >>/opt/module/doris/be/conf/be.conf
echo 'webserver_port = 7040' >>/opt/module/doris/be/conf/be.conf
echo 'mem_limit = 10%' >>/opt/module/doris/be/conf/be.conf
#rsync -rvl /opt/module/doris ruoli@hadoop103:/opt/module
#rsync -rvl /opt/module/doris ruoli@hadoop104:/opt/module
dr.sh 启停
#!/bin/bash
source /etc/profile
sudo sysctl -w vm.max_map_count=2000000
function my_sleep() {
for ((sec = $1; sec >= 0; sec--)); do
echo -ne "\e[1;31m $sec $2\e[0m"
echo -ne "\r"
sleep 1
done
echo ''
}
case $1 in
"start")
if [ ! -f "/etc/doris" ]; then
echo "第一次启动 doris 集群, 时间会久一些..."
echo "在 ruoli-arch 启动 fe"
/opt/module/doris/fe/bin/start_fe.sh --daemon
# 修改 登陆密码为 root
my_sleep 15 "秒后增加 1 个be 节点"
mysql -h ruoli-arch -uroot -P 9030 -e "SET PASSWORD FOR 'root' = PASSWORD('root');" 2>/dev/null
mysql -h ruoli-arch -uroot -P 9030 -proot -e "ALTER SYSTEM ADD BACKEND 'ruoli-arch:9050';" 2>/dev/null
for host in ruoli-arch; do
echo "在 $host 启动 be"
ssh $host "/opt/module/doris/be/bin/start_be.sh --daemon"
done
sudo touch /etc/doris
else
echo "不是第一次启动 doris 集群, 正常启动..."
for host in ruoli-arch; do
echo "========== 在 $host 上启动 fe ========="
ssh $host "source /etc/profile; /opt/module/doris/fe/bin/start_fe.sh --daemon"
done
for host in ruoli-arch; do
echo "========== 在 $host 上启动 be ========="
ssh $host "source /etc/profile; /opt/module/doris/be/bin/start_be.sh --daemon"
done
fi
;;
"stop")
for host in ruoli-arch; do
echo "========== 在 $host 上停止 fe ========="
ssh $host "source /etc/profile; /opt/module/doris/fe/bin/stop_fe.sh"
done
for host in ruoli-arch; do
echo "========== 在 $host 上停止 be ========="
ssh $host "source /etc/profile; /opt/module/doris/be/bin/stop_be.sh"
done
;;
*)
echo " start 启动doris集群"
echo " stop 停止stop集群"
;;
esac
常用命令
#启停fe
/opt/module/doris/fe/bin/start_fe.sh --daemon
/opt/module/doris/fe/bin/stop_fe.sh --daemon
#首次连接mysql
mysql -h ruoli-arch -P 9030 -uroot -p
SET PASSWORD FOR 'root' = PASSWORD('root');
#添加be
ALTER SYSTEM ADD BACKEND "ruoli-arch:9050";
#查看be状态
SHOW PROC '/backends'\G
# fe状态
SHOW PROC '/frontends'\G
#启停be
/opt/module/doris/be/bin/start_be.sh --daemon
/opt/module/doris/be/bin/stop_be.sh --daemon
DolphinScheduler
安装
# tar -zxvf apache-dolphinscheduler-2.0.5-bin.tar.gz
mysql -uroot -proot
# SQL
CREATE DATABASE dolphinscheduler DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
CREATE USER 'dolphinscheduler'@'%' IDENTIFIED BY 'dolphinscheduler';
GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler'@'%';
flush privileges;
quit;
cd apache-dolphinscheduler-2.0.5-bin/
# 配置安装文件
vim conf/config/install_config.conf
cp ../mysql/mysql-connector-j-8.0.31.jar lib
#初始化数据库
script/create-dolphinscheduler.sh
hdp.sh start
zk.sh start
#一键部署并启动DolphinScheduler
./install.sh
# UI: http://ruoli-arch:12345/dolphinscheduler
# admin dolphinscheduler123
bin/stop-all.sh
常用命令
hdp.sh start
zk.sh start
cd /opt/module/dolphinscheduler
## 单机模式 ##
bin/dolphinscheduler-daemon.sh start standalone-server
bin/dolphinscheduler-daemon.sh stop standalone-server
## 集群模式 ##
# 一键启停所有服务 - 注意同Hadoop的启停脚本进行区分
bin/start-all.sh
bin/stop-all.sh
# 单独启动 启停 Master
bin/dolphinscheduler-daemon.sh start master-server
bin/dolphinscheduler-daemon.sh stop master-server
。。。
install_config.conf
ips="ruoli-arch"
# 将要部署任一 DolphinScheduler 服务的服务器主机名或 ip 列表
masters="ruoli-arch"
# master 所在主机名列表,必须是 ips 的子集
workers="ruoli-arch:default"
# worker主机名及队列,此处的 ip 必须在 ips 列表中
alertServer="ruoli-arch"
# 告警服务所在服务器主机名
apiServers="ruoli-arch"
# api服务所在服务器主机名
# pythonGatewayServers="ds1"
# 不需要的配置项,可以保留默认值,也可以用 # 注释
installPath="/opt/module/dolphinscheduler"
# DS 安装路径,如果不存在会创建
deployUser="ruoli"
# 部署用户,任务执行服务是以 sudo -u {linux-user} 切换不同 Linux 用户的方式来实现多租户运行作业,因此该用户必须有免密的 sudo 权限。
dataBasedirPath="/tmp/dolphinscheduler"
# 前文配置的所有节点的本地数据存储路径,需要确保部署用户拥有该目录的读写权限
javaHome="/opt/module/jdk"
# JAVA_HOME 路径
DATABASE_TYPE=${DATABASE_TYPE:-"mysql"}
# 数据库类型
SPRING_DATASOURCE_URL=${SPRING_DATASOURCE_URL:-"jdbc:mysql://ruoli-arch:3306/dolphinscheduler?useUnicode=true&allowPublicKeyRetrieval=true&characterEncoding=UTF-8"}
# 数据库 URL
SPRING_DATASOURCE_USERNAME=${SPRING_DATASOURCE_USERNAME:-"dolphinscheduler"}
# 数据库用户名
SPRING_DATASOURCE_PASSWORD=${SPRING_DATASOURCE_PASSWORD:-"dolphinscheduler"}
# 数据库密码
registryPluginName="zookeeper"
# 注册中心插件名称,DS 通过注册中心来确保集群配置的一致性
registryServers="ruoli-arch:2181"
# 注册中心地址,即 Zookeeper 集群的地址
registryNamespace="dolphinscheduler"
# DS 在 Zookeeper 的结点名称
resourceStorageType="HDFS"
# 资源存储类型
resourceUploadPath="/dolphinscheduler"
# 资源上传路径
defaultFS="hdfs://ruoli-arch:8020"
# 默认文件系统
resourceManagerHttpAddressPort="8088"
# yarn RM http 访问端口
yarnHaIps=
# Yarn RM 高可用 ip,若未启用 RM 高可用,则将该值置空
singleYarnIp="ruoli-arch"
# Yarn RM 主机名,若启用了 HA 或未启用 RM,保留默认值
hdfsRootUser="ruoli"
# 拥有 HDFS 根目录操作权限的用户
ds.sh 单机启停
vim ~/bin/ds.sh
chmod 755 ~/bin/ds.sh
#! /bin/bash
case $1 in
"start") {
for i in ruoli-arch; do
echo " --------启动 $i dolphinscheduler-------"
ssh $i "/opt/module/dolphinscheduler/bin/dolphinscheduler-daemon.sh start standalone-server"
done
} ;;
"stop") {
for i in ruoli-arch; do
echo " --------停止 $i dolphinscheduler-------"
ssh $i "/opt/module/dolphinscheduler/bin/dolphinscheduler-daemon.sh stop standalone-server"
done
} ;;
esac
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术
· .NET周刊【3月第1期 2025-03-02】