当前位置: 首页 > news >正文

文字识别准确率

import easyocr
import cv2
import os
import numpy as npdef calculate_ocr_accuracy(image_path, ground_truth_text=None):reader = easyocr.Reader(['ch_sim', 'en'])try:image = cv2.imread(image_path)if image is None:raise ValueError(f"无法读取图片: {image_path}")results = reader.readtext(image)recognized_text = ""confidence_scores = []for (bbox, text, confidence) in results:recognized_text += text + " "confidence_scores.append(confidence)recognized_text = recognized_text.strip()print("=" * 50)print("OCR识别结果:")print("=" * 50)for i, (bbox, text, confidence) in enumerate(results):print(f"文本块 {i+1}: '{text}' (置信度: {confidence:.4f})")print(f"\n完整识别文本: {recognized_text}")avg_confidence = np.mean(confidence_scores) if confidence_scores else 0print(f"\n平均置信度: {avg_confidence:.4f}")if ground_truth_text:accuracy = calculate_text_accuracy(ground_truth_text, recognized_text)print(f"文本准确率: {accuracy:.2f}%")return {'recognized_text': recognized_text,'ground_truth': ground_truth_text,'accuracy': accuracy,'avg_confidence': avg_confidence,'details': results}else:return {'recognized_text': recognized_text,'avg_confidence': avg_confidence,'details': results}except Exception as e:print(f"处理图片时出错: {e}")return Nonedef calculate_text_accuracy(ground_truth, recognized):gt_clean = ''.join(ground_truth.split()).lower()rec_clean = ''.join(recognized.split()).lower()distance = levenshtein_distance(gt_clean, rec_clean)max_len = max(len(gt_clean), len(rec_clean))if max_len == 0:return 100.0accuracy = (1 - distance / max_len) * 100return accuracydef levenshtein_distance(s1, s2):if len(s1) < len(s2):return levenshtein_distance(s2, s1)if len(s2) == 0:return len(s1)previous_row = range(len(s2) + 1)for i, c1 in enumerate(s1):current_row = [i + 1]for j, c2 in enumerate(s2):insertions = previous_row[j + 1] + 1deletions = current_row[j] + 1substitutions = previous_row[j] + (c1 != c2)current_row.append(min(insertions, deletions, substitutions))previous_row = current_rowreturn previous_row[-1]def batch_ocr_accuracy_test(image_folder, ground_truths):total_accuracy = 0total_confidence = 0count = 0print("开始批量OCR准确率测试...")print("=" * 60)for filename, truth_text in ground_truths.items():image_path = os.path.join(image_folder, filename)if os.path.exists(image_path):print(f"\n处理图片: {filename}")result = calculate_ocr_accuracy(image_path, truth_text)if result and 'accuracy' in result:total_accuracy += result['accuracy']total_confidence += result['avg_confidence']count += 1if count > 0:print("\n" + "=" * 60)print("批量测试结果汇总:")print(f"测试图片数量: {count}")print(f"平均准确率: {total_accuracy/count:.2f}%")print(f"平均置信度: {total_confidence/count:.4f}")print("=" * 60)def main():image_path = r"C:\Users\18306\Desktop\picture\test_image.jpg"if not os.path.exists(image_path):print(f"错误: 图片文件不存在 - {image_path}")returnprint("开始OCR识别...")result = calculate_ocr_accuracy(image_path)if result:print("\n识别完成!")else:print("识别失败!")if __name__ == "__main__":main()

image

http://www.gsyq.cn/news/48988.html

相关文章:

  • logstash配置和启动
  • 2025年广东军事化训练学校/机构最新TOP5权威评测:铸就坚毅品格,领航成长之路
  • 2025年广东青少年感恩教育学校/机构最新TOP5推荐:家庭教育、心理健康,科学评测
  • 2025广东法制教育机构/学校最新TOP5评测:心理健康、素质拓展、行为矫正全覆盖
  • 2025年贵州贵阳母婴护理机构最新TOP5评测:守护母婴健康的专业力量
  • 使用 vLLM 本地部署 Qwen3-Embedding-8B 模型并接入 Dify 完整指南 - yi
  • 《VS Code:高效编程的插件与配置》
  • 10.26 NOTE
  • 2025年共享仓库服务最新TOP5推荐:山东、河北、江浙沪等国内区域,中亚、阿富汗、俄罗斯等国际地区,高效仓储解决方案引领者
  • 在ec2上部署CosyVoice2模型
  • 每日反思(2025_11_13)
  • 2025年运输服务企业最新TOP5评测:国内、跨境物流解决方案引领者
  • 疲劳数据分析与设计曲线 25
  • 【AI翻译】分布式系统中的心跳机制
  • “ArcGIS Pro制图-模型构建器-ArcPy开发-AI-无人机实操”系列培训班预告
  • 控制领域常用希腊字母表
  • DNS record types: AAAA vs AA All In One
  • JVM之锁优化(自旋锁 适应性自旋 锁消除 锁粗化 轻量级锁 偏向锁) - 教程
  • 面试官问:什么是Java内存模型? - 教程
  • leetcode6. Z 字形变换
  • .NET Conf China 2025:讲师与主题全揭秘
  • 深入解析:洞穴人的仰望:洞穴人隐喻与进步主义的歧途
  • 《JIRA:项目管理与敏捷开发实践》
  • 2025年西北数字人厂商最新TOP5评测:引领陕西甘肃智区域能交互新生态
  • PLC与单片机区
  • 污染控制化学及工程考点背诵手册
  • 杂记 - 4
  • LeetCode 面试经典 150_栈_简化路径(53_71_C++_中等)(栈+stringstream) - 实践
  • 污染控制化学及工程知识点整理
  • 2025.11.13模拟赛