Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

migration party fixes labels #330

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 70 additions & 17 deletions ebl/fragmentarium/application/annotations_service.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import functools
from io import BytesIO
from typing import Tuple, Sequence

import attr
import pydash
from PIL import Image

from ebl.changelog import Changelog
from ebl.corpus.domain.line import Line
from ebl.ebl_ai_client import EblAiClient
from ebl.files.application.file_repository import FileRepository
from ebl.fragmentarium.application.annotations_repository import AnnotationsRepository
Expand All @@ -19,9 +22,12 @@
Annotations,
AnnotationValueType,
)

from ebl.transliteration.domain.at_line import ObjectAtLine, SurfaceAtLine, ColumnAtLine
from ebl.transliteration.domain.line_label import LineLabel
from ebl.transliteration.domain.line_number import LineNumber, AbstractLineNumber, \
LineNumberRange
from ebl.transliteration.domain.museum_number import MuseumNumber
from ebl.transliteration.domain.text_line import TextLine
from ebl.users.domain.user import User

Image.MAX_IMAGE_PIXELS = None # pyre-ignore[9]
Expand All @@ -48,17 +54,6 @@ def generate_annotations(
def find(self, number: MuseumNumber) -> Annotations:
return self._annotations_repository.query_by_museum_number(number)

def _label_by_line_number(
self, line_number_to_match: int, labels: Sequence[LineLabel]
) -> str:
matching_label = None
for label in labels:
label_line_number = label.line_number
if label_line_number and label_line_number.is_matching_number(
line_number_to_match
):
matching_label = label
return matching_label.formatted_label if matching_label else ""

def _cropped_image_from_annotations_helper(
self,
Expand All @@ -71,11 +66,7 @@ def _cropped_image_from_annotations_helper(
updated_cropped_annotations = []

for annotation in annotations.annotations:
label = (
self._label_by_line_number(annotation.data.path[0], labels)
if annotation.data.type != AnnotationValueType.BLANK
else ""
)
label = labels[annotation.data.path[0]].formatted_label if annotation.data.type != AnnotationValueType.BLANK else ""
cropped_image = annotation.crop_image(image)
cropped_sign_image = CroppedSignImage.create(cropped_image)
cropped_sign_images.append(cropped_sign_image)
Expand Down Expand Up @@ -109,6 +100,7 @@ def _cropped_image_from_annotations(
annotations, image, fragment.script, fragment.text.labels
)


def update(self, annotations: Annotations, user: User) -> Annotations:
old_annotations = self._annotations_repository.query_by_museum_number(
annotations.fragment_number
Expand All @@ -130,3 +122,64 @@ def update(self, annotations: Annotations, user: User) -> Annotations:
{"_id": _id, **schema.dump(annotations_with_image_ids)},
)
return annotations_with_image_ids

def get_start_number(self, line_number: AbstractLineNumber):
if isinstance(line_number, LineNumberRange):
return line_number.start.number
else:
return line_number.number

def get_end_number(self, line_number: AbstractLineNumber):
if isinstance(line_number, LineNumberRange):
return line_number.end.number
else:
return line_number.number


def _migrate_and_fix_labels(
self, annotations: Annotations
) -> Annotations:
fragment = self._fragments_repository.query_by_museum_number(
annotations.fragment_number
)
text = fragment.text
labels = []
for i in range(len(text.lines) - 1):
if i + 1 < len(text.labels):
label_first, label_second = text.labels[i], text.labels[i+1]
if self.get_start_number(label_second.line_number) > self.get_end_number(label_first.line_number):
line_number = self.get_start_number(label_second.line_number)
if not label_first.line_number.is_matching_number(line_number - 1):
labels.extend([label_first, None])
break
labels.append(label_first)
break
labels.append(None)

script = fragment.script
updated_cropped_annotations = []
for annotation in annotations.annotations:
if len(annotation.data.path) == 0:
break
annotation_path = annotation.data.path[0]
if annotation.cropped_sign and len(labels) - 1 >= annotation_path:
label = labels[annotation_path].formatted_label if labels[annotation_path] else ""
if annotation.cropped_sign.label != label:
print(f"{fragment.number} -- {annotation.data.sign_name} --- {annotation.cropped_sign.label}-------{label}")
updated_cropped_annotation = attr.evolve(
annotation,
cropped_sign=CroppedSign(
annotation.cropped_sign.image_id,
script,
label,
),
)
updated_cropped_annotations.append(updated_cropped_annotation)
else:
updated_cropped_annotations.append(annotation)
return attr.evolve(annotations, annotations=updated_cropped_annotations)

def migrate(self, annotations: Annotations) -> Annotations:
annotations_with_image_ids = self._migrate_and_fix_labels(annotations)
self._annotations_repository.create_or_update(annotations_with_image_ids)
return annotations_with_image_ids
Loading