PaddleOCR图片文字识别
使用paddleOCR识别图片文字
Docker安装
Dockerfile
# Version: 2.3
FROM registry.baidubce.com/paddlepaddle/paddle:2.2.0
# PaddleOCR base on Python3.7
RUN pip3.7 install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN pip3.7 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
# 国内gitee比github下载源码快
# RUN git clone https://gitee.com/yarnk/bd-ocr-server.git /PaddleOCR
RUN git clone https://gitee.com/paddlepaddle/PaddleOCR.git /PaddleOCR
WORKDIR /PaddleOCR
RUN pip3.7 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN mkdir -p /PaddleOCR/inference/
# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v2.0_det_infer to ch_ppocr_server_v2.0_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py)
ADD https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/ch_PP-OCRv2_det_infer.tar -C /PaddleOCR/inference/
# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_rec_infer to ch_ppocr_server_v2.0_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py)
ADD https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/ch_PP-OCRv2_rec_infer.tar -C /PaddleOCR/inference/
# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_cls_infer to ch_ppocr_mobile_v2.0_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py)
ADD https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/ch_ppocr_mobile_v2.0_cls_infer.tar -C /PaddleOCR/inference/
EXPOSE 8868
CMD ["/bin/bash","-c","hub install deploy/hubserving/ocr_system/ && hub serving start -m ocr_system"]
本地安装
安装Paddle环境
- 首先根据平台安装对应的Paddle,如果有NIVDA GPU则安装GPU版本的paddle。
- 官方文档链接(开始使用_飞桨-源于产业实践的开源深度学习平台 (paddlepaddle.org.cn))
- 安装paddlehub
pip install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
模型安装
- 下载PaddleOCR源码
git clone https://gitee.com/paddlepaddle/PaddleOCR.git
- 下载模型
https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
- 将模型放到inference目录下,并解压。
mkdir -p /PaddleOCR/inference/
tar xf [模型名称]
启动服务
hub install deploy/hubserving/ocr_system/ && hub serving start -m ocr_system
请求接口
POST http://ip:8866/predict/ocr_system
body:
{
"images": [
"{{base64Img}}"
]
}
火车票文字提取
调用接口获取到json数据,保存为text.json
展开查看
{ "result": [ { "confidence": 0.9909937977790833, "text": "始发改签仅供报销使用", "text_region": [ [ 405, 299 ], [ 467, 298 ], [ 469, 958 ], [ 407, 958 ] ] }, { "confidence": 0.9509046077728271, "text": "¥67.0元", "text_region": [ [ 551, 302 ], [ 615, 303 ], [ 611, 549 ], [ 547, 548 ] ] }, { "confidence": 0.9296568036079407, "text": "2022年06月21日13:03开", "text_region": [ [ 624, 297 ], [ 691, 297 ], [ 693, 1020 ], [ 625, 1020 ] ] }, { "confidence": 0.965471088886261, "text": "19423300330622B009967 JM", "text_region": [ [ 98, 315 ], [ 153, 314 ], [ 160, 986 ], [ 106, 986 ] ] }, { "confidence": 0.9171962738037109, "text": "Z338009967", "text_region": [ [ 867, 313 ], [ 934, 313 ], [ 938, 764 ], [ 871, 764 ] ] }, { "confidence": 0.9896604418754578, "text": "合肥南站", "text_region": [ [ 764, 353 ], [ 849, 354 ], [ 843, 802 ], [ 758, 801 ] ] }, { "confidence": 0.957516610622406, "text": "Hefeinan", "text_region": [ [ 709, 419 ], [ 751, 420 ], [ 745, 647 ], [ 703, 646 ] ] }, { "confidence": 0.9709752798080444, "text": "买票请到12306 发货请到95306", "text_region": [ [ 243, 443 ], [ 303, 442 ], [ 315, 1142 ], [ 255, 1143 ] ] }, { "confidence": 0.8855549097061157, "text": "中国铁路祝您旅途愉快", "text_region": [ [ 182, 530 ], [ 238, 529 ], [ 242, 1059 ], [ 186, 1059 ] ] }, { "confidence": 0.9956666827201843, "text": "D2194", "text_region": [ [ 756, 814 ], [ 836, 814 ], [ 836, 1072 ], [ 756, 1072 ] ] }, { "confidence": 0.9773098230361938, "text": "09车15B号", "text_region": [ [ 631, 1120 ], [ 693, 1119 ], [ 695, 1427 ], [ 633, 1427 ] ] }, { "confidence": 0.997417688369751, "text": "南京南站", "text_region": [ [ 770, 1146 ], [ 850, 1147 ], [ 843, 1506 ], [ 763, 1504 ] ] }, { "confidence": 0.8791714906692505, "text": "Nanjingnan", "text_region": [ [ 704, 1178 ], [ 753, 1179 ], [ 747, 1474 ], [ 698, 1473 ] ] }, { "confidence": 0.9847577214241028, "text": "二等座", "text_region": [ [ 564, 1260 ], [ 620, 1260 ], [ 620, 1431 ], [ 564, 1431 ] ] }, { "confidence": 0.9356492161750793, "text": "检票:2B", "text_region": [ [ 867, 1304 ], [ 929, 1304 ], [ 929, 1541 ], [ 867, 1541 ] ] } ] }
创建get_word.py
""" 规则读取json文件中的字段 """ import json import re def get_info(json_val): words_list = json_val["result"] lines = [] last_word = "" # 上一行的文字 last_word_y2 = 0 # 上一下的y轴最大坐标 last_word_h = 0 # 上一行字的高度 for words in words_list: txt = words["text"] y1, y2 = words["text_region"][0][1], words["text_region"][2][1] word_h = abs(y2 - y1) if word_h==0: continue # 本行字与上一行字之间的间隔 interval interval = abs(y1 - last_word_y2) # 判断本行与上一行是否在一个段落中 if last_word_h == 0 or (interval >= 0 and abs(word_h - last_word_h) < min((word_h, last_word_h)) / 2 and word_h * 0.45 > interval): last_word += txt else: lines.append(last_word) last_word = txt last_word_h = word_h last_word_y2 = y2 return lines if __name__ == '__main__': #文字识别保存的json文件 json_path = "text.json" with open(json_path, 'r', encoding='utf-8') as fp: json_val = json.load(fp) lines = get_info(json_val) for line in lines: print(line)
执行
python get_word.py