天行学习助手天行学习助手

从PDF或DXF中提取尺寸并生成表格 为什么提取不到尺寸,请技术大神帮忙指导下?

import os
import ezdxf
import pandas as pd
from gooey import Gooey, GooeyParser
import fitz  # PyMuPDF

# 用于缓存块定义
block_cache = {}


def extract_sizes_from_block(block, all_sizes):
    """递归提取块中的尺寸信息"""
    for entity in block:
        if entity.dxftype() == 'DIMENSION':
            try:
                # 处理不同类型的尺寸标注
                if entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.LINEAR:
                    measurement = entity.dxf.measurement
                elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.ANGULAR:
                    # 角度尺寸处理,这里简单示例取角度值
                    measurement = entity.get_measurement()
                elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.RADIUS:
                    measurement = entity.dxf.measurement
                else:
                    # 其他类型尺寸处理
                    measurement = entity.dxf.measurement
                all_sizes.append(measurement)
            except AttributeError:
                continue
        elif entity.dxftype() == 'INSERT':
            block_name = entity.dxf.name
            if block_name not in block_cache:
                block_cache[block_name] = block.doc.blocks.get(block_name)
            nested_block = block_cache[block_name]
            extract_sizes_from_block(nested_block, all_sizes)


def extract_sizes_from_pdf(file_path):
    all_sizes = []
    try:
        doc = fitz.open(file_path)
        for page in doc:
            text = page.get_text()
            # 这里简单假设尺寸数据是数字,实际情况可能需要更复杂的正则表达式
            import re
            sizes = re.findall(r'\d+\.?\d*', text)
            all_sizes.extend([float(size) for size in sizes])
        doc.close()
    except Exception as e:
        print(f"处理 PDF 文件时出错: {e}")
    return all_sizes


@Gooey(program_name="DWG/DXF/PDF 尺寸汇总工具")
def main():
    parser = GooeyParser(description="读取 DWG、DXF 或 PDF 文档,将尺寸汇总到表格")
    parser.add_argument('file_path', widget='FileChooser', help='选择 DWG、DXF 或 PDF 文件')
    parser.add_argument('output_path', widget='FileSaver', help='选择输出的 CSV 文件路径', default='output.csv')
    args = parser.parse_args()

    file_path = args.file_path
    output_path = args.output_path

    if not os.path.exists(file_path):
        print(f"文件 {file_path} 不存在!")
        return

    all_sizes = []
    file_ext = os.path.splitext(file_path)[1].lower()
    if file_ext in ['.dwg', '.dxf']:
        try:
            doc = ezdxf.readfile(file_path)

            # 处理模型空间
            msp = doc.modelspace()
            for entity in msp:
                if entity.dxftype() == 'DIMENSION':
                    try:
                        if entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.LINEAR:
                            measurement = entity.dxf.measurement
                        elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.ANGULAR:
                            measurement = entity.get_measurement()
                        elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.RADIUS:
                            measurement = entity.dxf.measurement
                        else:
                            measurement = entity.dxf.measurement
                        all_sizes.append(measurement)
                    except AttributeError:
                        continue
                elif entity.dxftype() == 'INSERT':
                    block_name = entity.dxf.name
                    if block_name not in block_cache:
                        block_cache[block_name] = doc.blocks.get(block_name)
                    block = block_cache[block_name]
                    extract_sizes_from_block(block, all_sizes)

            # 处理布局空间
            for layout in doc.layouts:
                for entity in layout:
                    if entity.dxftype() == 'DIMENSION':
                        try:
                            if entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.LINEAR:
                                measurement = entity.dxf.measurement
                            elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.ANGULAR:
                                measurement = entity.get_measurement()
                            elif entity.dxf.dimtype == ezdxf.dimstyles.DIMTYPE.RADIUS:
                                measurement = entity.dxf.measurement
                            else:
                                measurement = entity.dxf.measurement
                            all_sizes.append(measurement)
                        except AttributeError:
                            continue
                    elif entity.dxftype() == 'INSERT':
                        block_name = entity.dxf.name
                        if block_name not in block_cache:
                            block_cache[block_name] = doc.blocks.get(block_name)
                        block = block_cache[block_name]
                        extract_sizes_from_block(block, all_sizes)

        except Exception as e:
            print(f"处理文件时出错: {e}")
    elif file_ext == '.pdf':
        all_sizes = extract_sizes_from_pdf(file_path)

    df = pd.DataFrame({'尺寸': all_sizes})
    df.to_csv(output_path, index=False)
    print(f"尺寸已汇总到 {output_path}")


if __name__ == "__main__":
    main()
    
上一篇 SOP快速制作

相关文章

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。

回到首页 发表评论 回到顶部

遇到问题?请给我们留言

请填写您的电话号码,我们将回复您电话