Skip to content

Commit 6abd528

Browse files
committed
fix(documentai-toolbox): resolve mypy errors
1 parent cccd359 commit 6abd528

9 files changed

Lines changed: 45 additions & 38 deletions

File tree

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from .utilities import docai_utilities, gcs_utilities
2626
from .wrappers import document, entity, page
2727

28-
__all__ = (document, page, entity, converter, docai_utilities, gcs_utilities)
28+
__all__ = ("document", "page", "entity", "converter", "docai_utilities", "gcs_utilities")
2929

3030

3131
class Python37DeprecationWarning(DeprecationWarning): # pragma: NO COVER

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly:
190190
y_multiplier = 1.0
191191
normalized_vertices: List[documentai.NormalizedVertex] = []
192192

193-
if block.page_width and block.page_height:
193+
if block.page_width and block.page_height and block.docproto_width is not None and block.docproto_height is not None:
194194
x_multiplier = _get_multiplier(
195195
docproto_coordinate=block.docproto_width,
196196
external_coordinate=block.page_width,

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
import dataclasses
1818
import json
1919
from types import SimpleNamespace
20-
from typing import List, Optional, Type
20+
from typing import Any, List, Optional, Type, cast
2121

2222
from google.cloud import documentai
2323

2424

25-
def _get_target_object(json_data: any, target_object: str) -> Optional[SimpleNamespace]:
25+
def _get_target_object(json_data: Any, target_object: str) -> Any:
2626
r"""Returns SimpleNamespace of target_object.
2727
2828
Args:
@@ -72,45 +72,45 @@ class Block:
7272
page_number:
7373
Optional.
7474
"""
75-
type_: SimpleNamespace = dataclasses.field(init=True, repr=False)
76-
text: SimpleNamespace = dataclasses.field(init=True, repr=False)
77-
bounding_box: Optional[SimpleNamespace] = dataclasses.field(
75+
type_: Any = dataclasses.field(init=True, repr=False)
76+
text: Any = dataclasses.field(init=True, repr=False)
77+
bounding_box: Any = dataclasses.field(
7878
init=True, repr=False, default=None
7979
)
80-
block_references: Optional[SimpleNamespace] = dataclasses.field(
80+
block_references: Any = dataclasses.field(
8181
init=True, repr=False, default=None
8282
)
83-
block_id: Optional[SimpleNamespace] = dataclasses.field(
83+
block_id: Any = dataclasses.field(
8484
init=False, repr=False, default=None
8585
)
86-
confidence: Optional[SimpleNamespace] = dataclasses.field(
86+
confidence: Any = dataclasses.field(
8787
init=False, repr=False, default=None
8888
)
89-
page_number: Optional[SimpleNamespace] = dataclasses.field(
89+
page_number: Any = dataclasses.field(
9090
init=False, repr=False, default=None
9191
)
92-
page_width: Optional[SimpleNamespace] = dataclasses.field(
92+
page_width: Any = dataclasses.field(
9393
init=False, repr=False, default=None
9494
)
95-
page_height: Optional[SimpleNamespace] = dataclasses.field(
95+
page_height: Any = dataclasses.field(
9696
init=False, repr=False, default=None
9797
)
98-
bounding_width: Optional[SimpleNamespace] = dataclasses.field(
98+
bounding_width: Any = dataclasses.field(
9999
init=False, repr=False, default=None
100100
)
101-
bounding_height: Optional[SimpleNamespace] = dataclasses.field(
101+
bounding_height: Any = dataclasses.field(
102102
init=False, repr=False, default=None
103103
)
104-
bounding_type: Optional[SimpleNamespace] = dataclasses.field(
104+
bounding_type: Any = dataclasses.field(
105105
init=False, repr=False, default=None
106106
)
107-
bounding_unit: Optional[SimpleNamespace] = dataclasses.field(
107+
bounding_unit: Any = dataclasses.field(
108108
init=False, repr=False, default=None
109109
)
110-
bounding_x: Optional[SimpleNamespace] = dataclasses.field(
110+
bounding_x: Any = dataclasses.field(
111111
init=False, repr=False, default=None
112112
)
113-
bounding_y: Optional[SimpleNamespace] = dataclasses.field(
113+
bounding_y: Any = dataclasses.field(
114114
init=False, repr=False, default=None
115115
)
116116
docproto_width: Optional[float] = dataclasses.field(
@@ -180,7 +180,7 @@ def load_blocks_from_schema(
180180

181181
blocks: List[Block] = []
182182
ens = _get_target_object(objects, entities)
183-
for i in ens:
183+
for i in cast(Any, ens):
184184
entity = i
185185

186186
block_text = ""
@@ -203,11 +203,11 @@ def load_blocks_from_schema(
203203
b = Block(
204204
type_=block_type,
205205
text=block_text,
206-
bounding_box=_get_target_object(entity, normalized_vertices),
206+
bounding_box=_get_target_object(entity, normalized_vertices) if normalized_vertices is not None else None,
207207
)
208208

209209
if id_:
210-
b.id_ = _get_target_object(entity, id_)
210+
b.block_id = _get_target_object(entity, id_)
211211
if confidence:
212212
b.confidence = _get_target_object(entity, confidence)
213213
if page_number and page_number in entity:

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/converter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ def convert_from_config(
424424

425425
print("-------- Converting Started --------")
426426
files, labels, did_not_convert = _get_docproto_files(
427-
futures_list, project_id, location, processor_id
427+
list(futures_list), project_id, location, processor_id
428428
)
429429

430430
print("-------- Finished Converting --------")

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/vision_helpers.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -243,17 +243,17 @@ def _generate_entity_annotations(
243243
"""
244244
entity_annotations: List[EntityAnnotation] = []
245245
for token in page_info.page.tokens:
246-
v: vision.Vertex = []
246+
v: list[vision.Vertex] = []
247247
if token.layout.bounding_poly.vertices:
248248
for vertex in token.layout.bounding_poly.vertices:
249-
v.append({"x": int(vertex.x), "y": int(vertex.y)})
249+
v.append(vision.Vertex(x=int(vertex.x), y=int(vertex.y)))
250250
else:
251251
for normalized_vertex in token.layout.bounding_poly.normalized_vertices:
252252
v.append(
253-
{
254-
"x": int(normalized_vertex.x * page_info.page.dimension.width),
255-
"y": int(normalized_vertex.y * page_info.page.dimension.height),
256-
}
253+
vision.Vertex(
254+
x=int(normalized_vertex.x * page_info.page.dimension.width),
255+
y=int(normalized_vertex.y * page_info.page.dimension.height),
256+
)
257257
)
258258

259259
text_start_index = token.layout.text_anchor.text_segments[0].start_index

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from google.api_core.gapic_v1 import client_info
2222

23-
from google.cloud import documentai, documentai_toolbox, storage
23+
from google.cloud import documentai, documentai_toolbox, storage # type: ignore[attr-defined]
2424
from google.cloud.documentai_toolbox import constants
2525

2626

@@ -91,6 +91,7 @@ def get_blobs(
9191
if gcs_uri:
9292
gcs_bucket_name, gcs_prefix = split_gcs_uri(gcs_uri)
9393

94+
assert gcs_prefix is not None
9495
if re.match(constants.FILE_CHECK_REGEX, gcs_prefix):
9596
raise ValueError("gcs_prefix cannot contain file types")
9697

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/document.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import glob
2323
import os
2424
import re
25-
from typing import Dict, Iterator, List, Optional, Type, Union
25+
from typing import Any, Dict, Iterable, Iterator, List, Optional, Type, Union
2626

2727
from google.api_core.client_options import ClientOptions
2828
from google.api_core.operation import from_gapic as operation_from_gapic
@@ -51,7 +51,7 @@ def _document_layout_blocks_from_shards(
5151
shards: List[documentai.Document],
5252
) -> Iterator[documentai.Document.DocumentLayout.DocumentLayoutBlock]:
5353
def extract_blocks(
54-
blocks: List[documentai.Document.DocumentLayout.DocumentLayoutBlock],
54+
blocks: Iterable[documentai.Document.DocumentLayout.DocumentLayoutBlock],
5555
) -> Iterator[documentai.Document.DocumentLayout.DocumentLayoutBlock]:
5656
queue = collections.deque(blocks)
5757

@@ -325,8 +325,9 @@ def _dict_to_bigquery(
325325
bq_client = bigquery.Client(
326326
project=project_id, client_info=gcs_utilities._get_client_info()
327327
)
328+
resolved_project_id = project_id or bq_client.project
328329
table_ref = bigquery.DatasetReference(
329-
project=project_id, dataset_id=dataset_name
330+
project=resolved_project_id, dataset_id=dataset_name
330331
).table(table_name)
331332

332333
job_config = bigquery.LoadJobConfig(
@@ -345,7 +346,7 @@ def _dict_to_bigquery(
345346

346347

347348
def _apply_text_offset(
348-
documentai_object: Union[Dict[str, Dict], List], text_offset: int
349+
documentai_object: Union[Dict[str, Any], List[Any]], text_offset: int
349350
) -> None:
350351
r"""Applies a text offset to all text_segments in `documentai_object`.
351352

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/entity.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ class Entity:
6363

6464
_image: Optional[Image.Image] = dataclasses.field(init=False, default=None)
6565

66-
def __post_init__(self, page_offset: int) -> None:
66+
def __post_init__(self, page_offset: Optional[int]) -> None:
67+
if page_offset is None:
68+
page_offset = 0
69+
6770
self.type_ = self.documentai_object.type_
6871

6972
if self.documentai_object.mention_text:

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
from abc import ABC
1919
import dataclasses
2020
from functools import cached_property
21-
from typing import Iterable, List, Optional, Type
21+
from typing import Iterable, List, Optional, Type, TypeVar
22+
23+
T = TypeVar("T", bound="_BasePageElement")
2224

2325
import pandas as pd
2426

@@ -181,8 +183,8 @@ def _text_segment(self) -> documentai.Document.TextAnchor.TextSegment:
181183
return self.documentai_object.layout.text_anchor.text_segments[0]
182184

183185
def _get_children_of_element(
184-
self, potential_children: List["_BasePageElement"]
185-
) -> List["_BasePageElement"]:
186+
self, potential_children: List[T]
187+
) -> List[T]:
186188
"""
187189
Filters potential child elements to identify only those fully contained within this element.
188190

0 commit comments

Comments
 (0)