Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 67 additions & 1 deletion src/module/DocumentAI/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,81 @@ import { makeFormData } from 'koajax';
import { BaseModel, RESTClient, toggle } from 'mobx-restful';

import { LarkData } from '../../type';
import { TaxiInvoice, TrainInvoice, VatInvoice, VehicleInvoice } from './type';
import {
BankCard,
ContractFieldExtraction,
ContractOCRMode,
Resume,
TaxiInvoice,
TrainInvoice,
VatInvoice,
VehicleInvoice
} from './type';

export * from './type';

export abstract class DocumentAIModel extends BaseModel {
baseURI = 'document_ai/v1';
ocrBaseURI = 'optical_char_recognition/v1';

abstract client: RESTClient;

/**
* @see {@link https://open.feishu.cn/document/server-docs/ai/optical_char_recognition-v1/basic_recognize}
*/
@toggle('uploading')
async recognizeImageText(image: string) {
const { body } = await this.client.post<LarkData<{ text_list: string[] }>>(
`${this.ocrBaseURI}/image/basic_recognize`,
{ image }
);

return body!.data!.text_list;
}

/**
* @see {@link https://open.feishu.cn/document/ai/document_ai-v1/bank_card/recognize}
*/
@toggle('uploading')
async recognizeBankCard(file: File) {
const { body } = await this.client.post<LarkData<{ bank_card: BankCard }>>(
`${this.baseURI}/bank_card/recognize`,
makeFormData({ file })
);

return body!.data!.bank_card;
}

/**
* @see {@link https://open.feishu.cn/document/ai/document_ai-v1/resume/parse}
*/
@toggle('uploading')
async parseResumes(file: File) {
const { body } = await this.client.post<LarkData<{ resumes: Resume[] }>>(
`${this.baseURI}/resume/parse`,
makeFormData({ file })
);

return body!.data!.resumes;
}

/**
* @see {@link https://open.feishu.cn/document/server-docs/ai/document_ai-v1/contract/field_extraction}
*/
@toggle('uploading')
async extractContractFields(
file: File,
pdf_page_limit = 100,
ocr_mode: ContractOCRMode = 'auto'
) {
const { body } = await this.client.post<LarkData<ContractFieldExtraction>>(
`${this.baseURI}/contract/field_extraction`,
makeFormData({ file, pdf_page_limit, ocr_mode })
);

return body!.data!;
}

/**
* @see {@link https://open.feishu.cn/document/ai/document_ai-v1/vat_invoice/recognize}
*/
Expand Down
178 changes: 178 additions & 0 deletions src/module/DocumentAI/type.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
export interface DocumentAIEntity<T extends string = string> {
type: T;
value: string;
}

export type InvoiceEntityType =
| `invoice_${'code' | 'no' | 'special_seal'}`
| `seller_${'name' | 'taxpayer_no'}_in_seal`;
Expand Down Expand Up @@ -67,3 +72,176 @@ export interface VehicleInvoice {
| `total_price${'' | '_little'}`;
value: string;
}

export type BankCardEntityType = 'card_number' | 'date_of_expiry';

export interface BankCard {
entities: DocumentAIEntity<BankCardEntityType>[];
}

export interface ResumeEducation {
school: string;
start_date: string;
start_time: string;
end_date: string;
end_time: string;
major: string;
degree: string;
qualification: number;
}

export interface ResumeCareer {
company: string;
start_date: string;
start_time: string;
end_date: string;
end_time: string;
title: string;
type: number;
type_str: string;
job_description: string;
}

export interface ResumeProject {
name: string;
title: string;
start_date: string;
start_time: string;
end_date: string;
end_time: string;
description: string;
}

export interface ResumeLanguage {
level: number;
description: string;
}

export interface ResumeAward {
award: string;
date: string;
description: string;
}

export interface ResumeCertificate {
name: string;
desc: string;
}

export interface ResumeCompetition {
name: string;
desc: string;
}

export interface Resume {
file_md5: string;
content: string;
new_content: string;
name: string;
email: string;
mobile: string;
mobile_is_virtual: boolean;
country_code: string;
educations: ResumeEducation[];
careers: ResumeCareer[];
projects: ResumeProject[];
work_year: number;
date_of_birth: string;
gender: number;
willing_positions: string[];
current_location: string;
willing_locations: string[];
home_location: string;
languages: ResumeLanguage[];
awards: ResumeAward[];
certificates: ResumeCertificate[];
competitions: ResumeCompetition[];
self_evaluation: string;
urls: string[];
social_links: string[];
}

export type ContractOCRMode = 'force' | 'auto' | 'unused';

export interface ContractExtractPrice {
contract_price: number;
contract_price_original: string;
text: string;
}

export interface ContractExtractTerm {
initial_time: string;
initial_unit: string;
}

export interface ContractExtractTime {
time_start: string;
time_end: string;
original_time_start: string;
original_time_end: string;
text_start: string;
text_end: string;
initial_term: ContractExtractTerm;
text_initial_term: string;
}

export interface ContractExtractCopy {
copy_num: number;
original_copy: string;
key: string;
text: string;
}

export interface ContractExtractCurrency {
currency_name: string;
currency_text: string;
}

export interface ContractBodyEntity {
address: string;
contacts: string;
email: string;
phone: string;
id_number: string;
legal_representative: string;
party: string;
}

export type ContractPartyType = 'buy' | 'sell' | 'third';

export interface ContractBodyInfo {
body_type: ContractPartyType;
value: ContractBodyEntity;
}

export interface ContractBankEntity {
account_name: string;
bank_name: string;
account_number: string;
phone: string;
contacts: string;
tax_number: string;
address: string;
id_number: string;
email: string;
}

export interface ContractBankInfo {
bank_type:
| ContractPartyType
| `${ContractPartyType}_bank`
| 'uncertain_bank'
| 'unceratin_bank';
value: ContractBankEntity;
}

export interface ContractFieldExtraction {
file_id: string;
price: ContractExtractPrice;
time: ContractExtractTime;
copy: ContractExtractCopy;
currency: ContractExtractCurrency;
header: string;
body_info: ContractBodyInfo[];
bank_info: ContractBankInfo[];
}