起初想用 unoconv 将各类文档转成 pdf,txt 确实是一头包。这个方法基本可以将 txt 4 种格式都统一起来。 注意 linux 下需设置
LANG C.UTF-8
package until import ( "unicode/utf16" "bytes" "golang.org/x/text/encoding/simplifiedchinese" "golang.org/x/text/transform" "io/ioutil" ) func Utf16toString(b []uint8) (string) { if len(b)&1 != 0 { return string(b) } var bom int if len(b) >= 2 { switch n := int(b[0])<<8 | int(b[1]); n { case 0xfffe: bom = 1 fallthrough case 0xfeff: b = b[2:] w := make([]uint16, len(b)/2) for i := range w { w[i] = uint16(b[2*i+bom&1])<<8 | uint16(b[2*i+(bom+1)&1]) } return string(utf16.Decode(w)) default: gbk, _ := GbkToUtf8(b) return string(gbk) } } else { return string(b) } } func GbkToUtf8(s []byte) ([]byte, error) { reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder()) d, e := ioutil.ReadAll(reader) if e != nil { return nil, e } return d, nil }
func Utf16toString(b []uint8) (string) { var bom int if len(b) >= 2 { switch n := int(b[0])<<8 | int(b[1]); n { case 0xfffe: bom = 1 fallthrough case 0xfeff: b = b[2:] w := make([]uint16, len(b)/2) for i := range w { w[i] = uint16(b[2*i+bom&1])<<8 | uint16(b[2*i+(bom+1)&1]) } return string(utf16.Decode(w)) case 0x564d: gbk, _ := GbkToUtf8(b) return string(gbk) default: return string(b) } } else { return string(b) } }