import os
import ezdxf
import pandas as pd
from gooey import Gooey, GooeyParser
import fitz # PyMuPDF
# 用于缓存块定义
block_cache = {}
def extract_sizes_from_block(block, all_sizes):
"""递归提取块中的尺寸信息"""
for entity in block:
if entity.dxftype() == 'DIMENSION':
try:
# 处理不同类型的尺寸标注
if entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.LINEAR:
measurement = entity.dxf.measurement
elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.ANGULAR:
# 角度尺寸处理,这里简单示例取角度值
measurement = entity.get_measurement()
elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.RADIUS:
measurement = entity.dxf.measurement
else:
# 其他类型尺寸处理
measurement = entity.dxf.measurement
all_sizes.append(measurement)
except AttributeError:
continue
elif entity.dxftype() == 'INSERT':
block_name = entity.dxf.name
if block_name not in block_cache:
block_cache[block_name] = block.doc.blocks.get(block_name)
nested_block = block_cache[block_name]
extract_sizes_from_block(nested_block, all_sizes)
def extract_sizes_from_pdf(file_path):
all_sizes = []
try:
doc = fitz.open(file_path)
for page in doc:
text = page.get_text()
# 这里简单假设尺寸数据是数字,实际情况可能需要更复杂的正则表达式
import re
sizes = re.findall(r'\d+\.?\d*', text)
all_sizes.extend([float(size) for size in sizes])
doc.close()
except Exception as e:
print(f"处理 PDF 文件时出错: {e}")
return all_sizes
@Gooey(program_name="DWG/DXF/PDF 尺寸汇总工具")
def main():
parser = GooeyParser(description="读取 DWG、DXF 或 PDF 文档,将尺寸汇总到表格")
parser.add_argument('file_path', widget='FileChooser', help='选择 DWG、DXF 或 PDF 文件')
parser.add_argument('output_path', widget='FileSaver', help='选择输出的 CSV 文件路径', default='output.csv')
args = parser.parse_args()
file_path = args.file_path
output_path = args.output_path
if not os.path.exists(file_path):
print(f"文件 {file_path} 不存在!")
return
all_sizes = []
file_ext = os.path.splitext(file_path)[1].lower()
if file_ext in ['.dwg', '.dxf']:
try:
doc = ezdxf.readfile(file_path)
# 处理模型空间
msp = doc.modelspace()
for entity in msp:
if entity.dxftype() == 'DIMENSION':
try:
if entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.LINEAR:
measurement = entity.dxf.measurement
elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.ANGULAR:
measurement = entity.get_measurement()
elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.RADIUS:
measurement = entity.dxf.measurement
else:
measurement = entity.dxf.measurement
all_sizes.append(measurement)
except AttributeError:
continue
elif entity.dxftype() == 'INSERT':
block_name = entity.dxf.name
if block_name not in block_cache:
block_cache[block_name] = doc.blocks.get(block_name)
block = block_cache[block_name]
extract_sizes_from_block(block, all_sizes)
# 处理布局空间
for layout in doc.layouts:
for entity in layout:
if entity.dxftype() == 'DIMENSION':
try:
if entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.LINEAR:
measurement = entity.dxf.measurement
elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.ANGULAR:
measurement = entity.get_measurement()
elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.RADIUS:
measurement = entity.dxf.measurement
else:
measurement = entity.dxf.measurement
all_sizes.append(measurement)
except AttributeError:
continue
elif entity.dxftype() == 'INSERT':
block_name = entity.dxf.name
if block_name not in block_cache:
block_cache[block_name] = doc.blocks.get(block_name)
block = block_cache[block_name]
extract_sizes_from_block(block, all_sizes)
except Exception as e:
print(f"处理文件时出错: {e}")
elif file_ext == '.pdf':
all_sizes = extract_sizes_from_pdf(file_path)
df = pd.DataFrame({'尺寸': all_sizes})
df.to_csv(output_path, index=False)
print(f"尺寸已汇总到 {output_path}")
if __name__ == "__main__":
main()
从PDF或DXF中提取尺寸并生成表格 为什么提取不到尺寸,请技术大神帮忙指导下?
上一篇
SOP快速制作
下一篇
提取尺寸为0是什么原因?
相关文章
发表评论:
◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。