Unicode转义(\uXXXX)的编码和解码 go-nascii 类似于 native2ascii

go-nascii 原来是一个日本人用go语言写的程序,不过原版不支持管道输入,我增加了管道输入功能;

可执行文件下载地址:https://pan.baidu.com/s/1yBS5lmiMFCd86Wvtei-y8Q 提取码: 4znn

native2ascii 也能实现,但是经过测试发现ascii码和\uXXXX 混合模式时不完美

package main

import (
    "fmt"
    "os"
    "bufio"
    "regexp"
    "strconv"
    "strings"
    "unicode"
    "unicode/utf8"

    "github.com/jessevdk/go-flags"
)

type option struct {
    IsReverse []bool `short:"r" long:"reverse" description:"Unicode code points to string. E.g. \\u3042\\u3044\\u3046\\u3048\\u304A -> あいうえお"`
}

var re = regexp.MustCompile(`\\[uU][0-9a-fA-F]{4}`)

func main() {
    var opt option
    args, err := flags.ParseArgs(&opt, os.Args)
    if err != nil {
        os.Exit(0);
        //panic(err)
    }
    var instr string
    for _, arg := range args[1:] {
        instr = arg
    }

    if len(instr) == 0{
        input := bufio.NewScanner(os.Stdin)
        input.Scan()
        instr = input.Text()
    }
    isReverse := 0 < len(opt.IsReverse)
    if isReverse {
        asciiToUtf8(instr)
    } else {
        utf8ToAscii(instr)
    }
}

func utf8ToAscii(s string) {
    if !utf8.ValidString(s) {
        fmt.Printf("'%s' includes non-UTF8 value(s).\n", s)
        return
    }

    for _, r := range s {
        if r <= unicode.MaxASCII && !unicode.IsControl(r) {
            fmt.Print(string(r))
        } else {
            codepoint := fmt.Sprintf("%U", r)[2:]
            fmt.Printf("\\u%s", codepoint)
        }
    }
    fmt.Println()
}

func asciiToUtf8(s string) {
    // TODO: handle surrogate pairs.

    match := re.FindString(s)
    if match == "" {
        fmt.Println(s)
        return
    }

    codepoint := match[2:]
    n, err := strconv.ParseInt(codepoint, 16, 32)
    if err != nil {
        fmt.Printf("'%s' can't be converted to number.\n", codepoint)
        return
    }

    r := int32(n)
    s = strings.Replace(s, match, string(r), -1)

    asciiToUtf8(s)
}

 

posted @ 2020-08-26 16:12  1CM  阅读(382)  评论(0编辑  收藏  举报