|
12 | 12 | import math
|
13 | 13 | import re
|
14 | 14 | import xml.etree.ElementTree as ET
|
| 15 | +from datetime import timedelta |
15 | 16 |
|
16 | 17 | IMG_CTX = 'http://iiif.io/api/image/2/context.json'
|
17 | 18 | PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json'
|
@@ -658,77 +659,131 @@ def create_manifest3(identifier, domain=None, page=None):
|
658 | 659 | vttfiles[sourceFilename] = []
|
659 | 660 |
|
660 | 661 | vttfiles[sourceFilename].append(f)
|
661 |
| - |
662 |
| - # create the canvases for each original |
663 |
| - for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]: |
664 |
| - normalised_id = file['name'].rsplit(".", 1)[0] |
| 662 | + |
| 663 | + if 'access-restricted-item' in metadata['metadata'] and metadata['metadata']['access-restricted-item']: |
| 664 | + # this is a news item so has to be treated differently |
| 665 | + # https://ia801803.us.archive.org/29/items/CSPAN3_20180217_164800_Poplar_Forest_Archaeology/CSPAN3_20180217_164800_Poplar_Forest_Archaeology.mp4?start=0&end=360&ignore=x.mp4&cnt=0 |
| 666 | + mp4File = None |
| 667 | + duration = 0.0 |
| 668 | + filedata = None |
| 669 | + for file in metadata['files']: |
| 670 | + if file['name'].endswith('.mp4'): |
| 671 | + mp4File = file['name'] |
| 672 | + duration = float(file['length']) |
| 673 | + filedata = file |
| 674 | + |
| 675 | + normalised_id = mp4File.rsplit(".", 1)[0] |
665 | 676 | slugged_id = normalised_id.replace(" ", "-")
|
666 | 677 | c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
|
667 |
| - c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width'])) |
668 |
| - |
669 |
| - # Add vtt if present |
670 |
| - if vttfiles and normalised_id in vttfiles: |
671 |
| - vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt" |
672 |
| - |
673 |
| - vttNo = 1 |
674 |
| - for vttFile in vttfiles[normalised_id]: |
675 |
| - vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}", |
676 |
| - motivation="supplementing", |
677 |
| - target=c.id, |
678 |
| - anno_page_id=vttAPId, |
679 |
| - body={"id": f"{domain}resource/{identifier}/{vttFile['name']}", |
680 |
| - "type": "Text", |
681 |
| - "format": "text/vtt", |
682 |
| - }) |
683 |
| - # add label and language |
684 |
| - if vttFile['name'].endswith("autogenerated.vtt"): |
685 |
| - vtAnno.body.label = { 'en': ['autogenerated']} |
686 |
| - else: |
687 |
| - # Assume language |
688 |
| - splitName = vttFile['name'].split(".") |
689 |
| - lang = splitName[-2] |
690 |
| - vtAnno.body.add_label(lang, language="none") |
691 |
| - vtAnno.body.language = lang |
692 |
| - |
693 |
| - vttNo += 1 |
694 |
| - |
695 |
| - # create intermediary objects |
| 678 | + c = Canvas(id=c_id, label=normalised_id, duration=duration, height=int(filedata['height']), width=int(filedata['width'])) |
696 | 679 | ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page")
|
697 |
| - anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id) |
698 | 680 |
|
699 |
| - # create body based on whether there are derivatives or not: |
700 |
| - if file['name'] in derivatives: |
701 |
| - body = Choice(items=[]) |
702 |
| - # add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734 |
703 |
| - for format in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']: |
704 |
| - if format in derivatives[file['name']]: |
705 |
| - r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}", |
706 |
| - type='Video', |
707 |
| - format=to_mimetype(format), |
708 |
| - label={"none": [format]}, |
709 |
| - duration=float(file['length']), |
710 |
| - height=int(file['height']), |
711 |
| - width=int(file['width']), |
712 |
| - ) |
713 |
| - body.items.append(r) |
714 |
| - elif file['format'] == format: |
715 |
| - r = ResourceItem( |
716 |
| - id=f"https://archive.org/download/{identifier}/{file['name'].replace(' ', '%20')}", |
717 |
| - type='Video', |
718 |
| - format=to_mimetype(format), |
719 |
| - label={"none": [format]}, |
720 |
| - duration=float(file['length']), |
721 |
| - height=int(file['height']), |
722 |
| - width=int(file['width'])) |
723 |
| - body.items.append(r) |
724 |
| - else: |
725 |
| - # todo: deal with instances where there are no derivatives for whatever reason |
726 |
| - pass |
727 |
| - |
728 |
| - anno.body = body |
729 |
| - ap.add_item(anno) |
| 681 | + vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt" |
| 682 | + vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/streamed", |
| 683 | + motivation="supplementing", |
| 684 | + target=c.id, |
| 685 | + anno_page_id=vttAPId, |
| 686 | + body={"id": f"{domain}vtt/streaming/{identifier}.vtt", |
| 687 | + "type": "Text", |
| 688 | + "format": "text/vtt", |
| 689 | + }) |
| 690 | + |
| 691 | + segments = math.floor(duration / 60) |
| 692 | + for i in range(segments): |
| 693 | + start = i * 60 |
| 694 | + if i == segments - 1: |
| 695 | + end = int(duration) |
| 696 | + else: |
| 697 | + end = (i + 1) * 60 |
| 698 | + |
| 699 | + #print (f"Start: {start} End: {end}, Duration: {float(end) - float(start)} full duration: {duration}") |
| 700 | + anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/{i}", motivation="painting", target=f"{c.id}#t={start},{end}") |
| 701 | + streamurl = f"https://{metadata['server']}{metadata['dir']}/{mp4File}?start={start}&end={end}&ignore=x.mp4&cnt=0" |
| 702 | + body = ResourceItem(id=streamurl, |
| 703 | + type='Video', |
| 704 | + format="video/mp4", |
| 705 | + label={"en": [f"Part {i + 1} of {segments}"]}, |
| 706 | + duration=end - start, |
| 707 | + height=int(filedata['height']), |
| 708 | + width=int(filedata['width']), |
| 709 | + ) |
| 710 | + |
| 711 | + anno.body = body |
| 712 | + ap.add_item(anno) |
| 713 | + |
730 | 714 | c.add_item(ap)
|
731 | 715 | manifest.add_item(c)
|
| 716 | + else: |
| 717 | + # create the canvases for each original |
| 718 | + for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]: |
| 719 | + normalised_id = file['name'].rsplit(".", 1)[0] |
| 720 | + slugged_id = normalised_id.replace(" ", "-") |
| 721 | + c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas" |
| 722 | + c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width'])) |
| 723 | + |
| 724 | + # Add vtt if present |
| 725 | + if vttfiles and normalised_id in vttfiles: |
| 726 | + vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt" |
| 727 | + |
| 728 | + vttNo = 1 |
| 729 | + for vttFile in vttfiles[normalised_id]: |
| 730 | + vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}", |
| 731 | + motivation="supplementing", |
| 732 | + target=c.id, |
| 733 | + anno_page_id=vttAPId, |
| 734 | + body={"id": f"{domain}resource/{identifier}/{vttFile['name']}", |
| 735 | + "type": "Text", |
| 736 | + "format": "text/vtt", |
| 737 | + }) |
| 738 | + # add label and language |
| 739 | + if vttFile['name'].endswith("autogenerated.vtt"): |
| 740 | + vtAnno.body.label = { 'en': ['autogenerated']} |
| 741 | + else: |
| 742 | + # Assume language |
| 743 | + splitName = vttFile['name'].split(".") |
| 744 | + lang = splitName[-2] |
| 745 | + vtAnno.body.add_label(lang, language="none") |
| 746 | + vtAnno.body.language = lang |
| 747 | + |
| 748 | + vttNo += 1 |
| 749 | + |
| 750 | + # create intermediary objects |
| 751 | + ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page") |
| 752 | + anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id) |
| 753 | + |
| 754 | + # create body based on whether there are derivatives or not: |
| 755 | + if file['name'] in derivatives: |
| 756 | + body = Choice(items=[]) |
| 757 | + # add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734 |
| 758 | + for format in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']: |
| 759 | + if format in derivatives[file['name']]: |
| 760 | + r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}", |
| 761 | + type='Video', |
| 762 | + format=to_mimetype(format), |
| 763 | + label={"none": [format]}, |
| 764 | + duration=float(file['length']), |
| 765 | + height=int(file['height']), |
| 766 | + width=int(file['width']), |
| 767 | + ) |
| 768 | + body.items.append(r) |
| 769 | + elif file['format'] == format: |
| 770 | + r = ResourceItem( |
| 771 | + id=f"https://archive.org/download/{identifier}/{file['name'].replace(' ', '%20')}", |
| 772 | + type='Video', |
| 773 | + format=to_mimetype(format), |
| 774 | + label={"none": [format]}, |
| 775 | + duration=float(file['length']), |
| 776 | + height=int(file['height']), |
| 777 | + width=int(file['width'])) |
| 778 | + body.items.append(r) |
| 779 | + else: |
| 780 | + # todo: deal with instances where there are no derivatives for whatever reason |
| 781 | + pass |
| 782 | + |
| 783 | + anno.body = body |
| 784 | + ap.add_item(anno) |
| 785 | + c.add_item(ap) |
| 786 | + manifest.add_item(c) |
732 | 787 | elif mediatype == "collection":
|
733 | 788 | raise IsCollection
|
734 | 789 | else:
|
@@ -785,6 +840,73 @@ def create_annotations(version, identifier, fileName, canvas_no, domain=None):
|
785 | 840 |
|
786 | 841 | return json.loads(annotationPage.jsonld())
|
787 | 842 |
|
| 843 | +def create_vtt_stream(identifier): |
| 844 | + """ |
| 845 | + This method will read a SRT file using the following URL: |
| 846 | + https://archive.org/download/CSPAN3_20180217_164800_Poplar_Forest_Archaeology/CSPAN3_20180217_164800_Poplar_Forest_Archaeology.cc5.srt?t=0/360 |
| 847 | + and convert it to vtt. The streaming text above takes seconds as a parameter. |
| 848 | + """ |
| 849 | + |
| 850 | + metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json() |
| 851 | + filename = "" |
| 852 | + duration = 0.0 |
| 853 | + for file in metadata['files']: |
| 854 | + if file['name'].endswith('.mpg') and file['source'] == 'original': |
| 855 | + duration = float(file['length']) |
| 856 | + # There seems to be multiple srt files but unclear how they are different |
| 857 | + if file['name'].endswith('.srt'): |
| 858 | + filename = file['name'] |
| 859 | + |
| 860 | + # Initialize the vtt content with the WEBVTT header |
| 861 | + vtt_content = ["WEBVTT\n"] |
| 862 | + |
| 863 | + segments = math.floor(duration / 60) |
| 864 | + for i in range(segments): |
| 865 | + start = i * 60 |
| 866 | + if i == segments - 1: |
| 867 | + end = int(duration) |
| 868 | + else: |
| 869 | + end = (i + 1) * 60 |
| 870 | + |
| 871 | + |
| 872 | + response = requests.get(f"https://archive.org/download/{identifier}/{filename}?t={start}/{end}") |
| 873 | + |
| 874 | + if response.status_code == 200: |
| 875 | + # Get the content of the SRT file as a string |
| 876 | + srt_content = response.text |
| 877 | + # Split the srt file by lines |
| 878 | + lines = srt_content.splitlines() |
| 879 | + for line in lines: |
| 880 | + # Convert time format: 00:00:00,000 -> 00:00:00.000 |
| 881 | + if "-->" in line: |
| 882 | + splitline = line.split("-->") |
| 883 | + starttime = timeToDelta(splitline[0].strip()) + timedelta(seconds=start) |
| 884 | + endtime = timeToDelta(splitline[0].strip()) + timedelta(seconds=start) |
| 885 | + line = f"{formatTimeVTT(starttime)} -> {formatTimeVTT(endtime)}" |
| 886 | + |
| 887 | + vtt_content.append(line) |
| 888 | + |
| 889 | + vtt_content.append(" ") |
| 890 | + |
| 891 | + # Join the list into a single string |
| 892 | + return "\n".join(vtt_content) |
| 893 | + |
| 894 | +def formatTimeVTT(time): |
| 895 | + hours, remainder = divmod(time.total_seconds(), 3600) |
| 896 | + minutes, seconds = divmod(remainder, 60) |
| 897 | + return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02}.{int(time.microseconds / 1000):03}" |
| 898 | + |
| 899 | +def timeToDelta(time): |
| 900 | + """ |
| 901 | + Convert SRT formated times to timedelta |
| 902 | + """ |
| 903 | + milliseconds = int(time.split(",")[1]) |
| 904 | + timeStr = time.split(",")[0] |
| 905 | + hour = int(timeStr.split(":")[0]) |
| 906 | + minute = int(timeStr.split(":")[1]) |
| 907 | + second = int(timeStr.split(":")[2]) |
| 908 | + return timedelta(hours=hour, minutes=minute, seconds=second, milliseconds=milliseconds) |
| 909 | + |
788 | 910 | def coerce_list(value):
|
789 | 911 | if isinstance(value, list):
|
790 | 912 | return ". ".join(value)
|
|
0 commit comments