分类: Linux
wget下载中文文件名乱码解决

引起乱码的原因主要是因为编码的问题,中文文件名有些编码是GBK或者gb2312,在linux下载下来以后就乱码了,
wget
下载的时候带上参数

--restrict-file-name=ascii或--restrict-file-names=nocontrol

用下面的py脚本转换下,在windows下就能正常显示中文了

import os, urllib, sys, getopt
 
class Renamer:
 
    input_encoding = ""
    output_encoding = ""
    path = ""
    is_url = False
 
    def __init__(self, input, output, path, is_url):
        self.input_encoding = input
        self.output_encoding = output
        self.path = path
        self.is_url = is_url
 
    def start(self):
        self.rename_dir(self.path)
 
    def rename(self, root, path):
        try:
            if self.is_url:
                new = urllib.unquote(path).decode(self.input_encoding).encode(self.output_encoding)
            else:
                new = path.decode(self.input_encoding).encode(self.output_encoding)
            os.rename(os.path.join(root, path), os.path.join(root, new))
        except:
            pass
 
    def rename_dir(self, path):
        for root, dirs, files in os.walk(path):
            for f in files:
                self.rename(root, f)
 
            if dirs == []:
                for f in files:
                    self.rename(root, f)
            else:
                for d in dirs:
                    self.rename_dir(os.path.join(root, d))
                    self.rename(root, d)
def usage():
    print '''This program can change encode of files or directories.
    Usage:   rename.exe [OPTION]...
    Options:
        -h, --help                  this document.
        -i, --input-encoding=ENC    set original encoding, default is UTF-8.
        -o, --output-encoding=ENC   set output encoding, default is GBK.
        -p, --path=PATH             choose the path which to process.
        -u, --is-url                whether as a URL
    '''
 
 
def main(argv):
    input_encoding = "utf-8"
    output_encoding = "gbk"
    path = ""
    is_url = True
 
    try:
        opts, args = getopt.getopt(argv, "hi:o:p:u", ["help", "input-encoding=", "output-encoding=", "path=", "is-url"])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            sys.exit()
        elif opt in ("-i", "--input-encoding"):
            input_encoding = arg
        elif opt in ("-o", "--output-encoding"):
            output_encoding = arg
        elif opt in ("-p", "--path"):
            path = arg
        elif opt in ("-u", "--is-url"):
            is_url = True
 
    rn = Renamer(input_encoding, output_encoding, path, is_url)
    rn.start()
 
if __name__ == '__main__':
    main(sys.argv[1:])

运行脚本:

xxx.py -i utf-8 -o gbk -p PATH -u

运行以后就可以批量更改PATH目录下面的文件名了;
或者用RenamePro.zip Rename Pro更名工具批量操作下·


相关博文:

发表新评论