[TOC]
参考
Eclipse workspace 工程java文件编码问题
解决问题:eclipse workspaces中的项目有的是GBK编码,如果导入到workspaces的默认编码为utf-8编码的话中文会出现乱码。
下面python3的方式递归变量当前目录以及子目录,把目录中的*.java文件由gbk转换为utf-8,注意只能用一次,一次之后当前目录以及子目录下的文件编码均会由gbk转为utf-8。
import codecs
def ReadFile(filePath,encoding="gbk"):
with codecs.open(filePath,"r",encoding) as f:
return f.read()
def WriteFile(filePath,u,encoding="utf-8"):
with codecs.open(filePath,"w",encoding) as f:
f.write(u)
def UTF8_2_GBK(src,dst):
content = ReadFile(src,encoding="gbk")
WriteFile(dst,content,encoding="utf-8")
import os
import os.path
# 递归遍历rootdir目录,把目录中的*.java编码由gbk转换为utf-8
def ReadDirectoryFile(rootdir):
for parent,dirnames,filenames in os.walk(rootdir):
#case 1:
for dirname in dirnames:
print("parent folder is:" + parent)
print("dirname is:" + dirname)
#case 2
for filename in filenames:
print("parent folder is:" + parent)
print("filename with full path:"+ os.path.join(parent,filename))
if filename.endswith(".java"):
UTF8_2_GBK(os.path.join(parent,filename),os.path.join(parent,filename))
print("Java文件")
if __name__=="__main__":
ReadDirectoryFile(".")
eclipse 工程jsp文件编码以及头
<%@page contentType="text/html;charset=gbk"%>
批量转换为<%@page contentType="text/html;charset=UTF-8"%>
大部分和上面编码类似,只是多了一个字符串的替换,重写了ReadFile方法,按行读取,然后替换字符"charset=gbk"-->"charset=UTF-8"
import codecs
def ReadFile(filePath,encoding="gbk"):
try:
strfile=""
f = codecs.open(filePath,"r",encoding)
line = f.readline()
while(line):
line=line.replace("charset=gbk","charset=UTF-8")#
line=line.replace("charset=GBK","charset=UTF-8")#
line=line.replace("charset=gb2312","charset=UTF-8")#
strfile+=line
line=f.readline()
f.close()
return strfile
except Exception:
return None
def WriteFile(filePath,u,encoding="utf-8"):
with codecs.open(filePath,"w",encoding) as f:
f.write(u)
def UTF8_2_GBK(src,dst):
content = ReadFile(src,encoding="gbk")
#print(content)
WriteFile(dst,content,encoding="utf-8")
import os
import os.path
# 递归遍历rootdir目录,把目录中的*.java编码由gbk转换为utf-8
def ReadDirectoryFile(rootdir):
for parent,dirnames,filenames in os.walk(rootdir):
#case 1:
for dirname in dirnames:
print("parent folder is:" + parent)
print("dirname is:" + dirname)
#case 2
for filename in filenames:
print("parent folder is:" + parent)
print("filename with full path:"+ os.path.join(parent,filename))
if filename.endswith(".jsp"):
UTF8_2_GBK(os.path.join(parent,filename),os.path.join(parent,filename))
print("jsp文件")
if __name__=="__main__":
ReadDirectoryFile(".")
为文件添加行号
#####################
# 为文件添加行号 #
#####################
import codecs
old_filename = "wordcount.py"
new_filename = "newwordcount.py"
fnew = codecs.open(new_filename,'w','utf-8')
with codecs.open(old_filename,'r','utf-8') as f:
number = 1
for line in f:
fnew.write(str(number)+' ' + line)
number = number + 1
fnew.close()