from sys import argv
from os import listdir
from os.path import join, isfile, isdir
from docx import Document
from openpyxl import load_workbook
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE
def checkdocx(dstStr, fn):
# 打开docx文档
document = Document(fn)
# 遍历所有段落文本
for p in document.paragraphs:
if dstStr in p.text:
return True
# 遍历所有表格中的单元格文本
for table in document.tables:
for row in table.rows:
for cell in row.cells:
if dstStr in cell.text:
return True
return False
def checkxlsx(dstStr, fn):
# 打开xlsx文件
wb = load_workbook(fn)
# 遍历所有工作表的单元格
for ws in wb.worksheets:
for row in ws.rows:
for cell in row:
try:
if dstStr in cell.value:
return True
except:
pass
return False
def checkpptx(dstStr, fn):
# 打开pptx文档
presentation = Presentation(fn)
# 遍历所有幻灯片
for slide in presentation.slides:
for shape in slide.shapes:
# 表格中的单元格文本
if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
for row in shape.table.rows:
for cell in row.cells:
if dstStr in cell.text_frame.text:
return True
# 文本框
elif shape.shape_type in (MSO_SHAPE_TYPE.TEXT_BOX,MSO_SHAPE_TYPE.PLACEHOLDER):
try:
if dstStr in shape.text:
return True
except:
pass
return False
def main(dstStr, flag):
# 使用广度优先的方式遍历当前文件夹及其所有子文件夹
# 一个圆点表示当前文件夹
dirs = ['.']
while dirs:
# 获取第一个尚未遍历的文件夹名称
currentDir = dirs.pop(0)
for fn in listdir(currentDir):
path = join(currentDir, fn)
if isfile(path):
if path.endswith('.docx') and\
checkdocx(dstStr, path):
print(path)
elif path.endswith('.xlsx') and\
checkxlsx(dstStr, path):
print(path)
elif path.endswith('.pptx') and\
checkpptx(dstStr, path):
print(path)
# 广度优先遍历目录树
elif flag and isdir(path):
dirs.append(path)
# argv[0]为程序文件名
# argv[1]表示是否要检查所有子文件夹中的文件
if argv[1] != '/s':
dstStr = argv[1]
flag = False
else:
dstStr = argv[2]
flag = True
main(dstStr, flag)