/
OS-World89f0fc5
import os
import tempfile
import xml.etree.ElementTree as ET
import zipfile
from typing import Dict
from desktop_env.evaluators.getters.file import get_vm_file
def get_background_image_in_slide(env, config: Dict[str, str]):
ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"]
image_id, image_file_path = None, None
ppt_file_localhost_path = get_vm_file(env, {"path": ppt_file_path, "dest": os.path.split(ppt_file_path)[-1]})
with zipfile.ZipFile(ppt_file_localhost_path, 'r') as myzip:
slide1_xml_file = 'ppt/slides/slide{}.xml'.format(slide_index + 1)
# firstly, check whether the background image is used in the slide
if slide1_xml_file not in myzip.namelist(): return None
with myzip.open(slide1_xml_file) as f:
# Parse the XML tree from the relationships file
tree = ET.parse(f)
root = tree.getroot()
bg_tag = "{http://schemas.openxmlformats.org/presentationml/2006/main}bgPr"
image_tag = "{http://schemas.openxmlformats.org/drawingml/2006/main}blip"
attr_tag = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
for child in root.iter(bg_tag):
try:
for element in child.iter(image_tag):
image_id = element.attrib[attr_tag]
break
except: pass
if image_id is not None: break
else: return None
# next, extract the background image from the slide
slide1_rels_file = 'ppt/slides/_rels/slide{}.xml.rels'.format(slide_index + 1)
if slide1_rels_file in myzip.namelist():
with myzip.open(slide1_rels_file) as f:
# Parse the XML tree from the relationships file
tree = ET.parse(f)
root = tree.getroot()
# Define the namespace used in the relationships file
namespaces = {'r': 'http://schemas.openxmlformats.org/package/2006/relationships'}
# Look for all relationship elements that have a type attribute for image
for rel in root.findall('r:Relationship', namespaces):
# Check if the relationship is for an image file
if 'image' in rel.attrib['Type'] and rel.attrib['Id'] == image_id:
target = rel.attrib['Target']
if target.startswith('..'):
# Resolve the relative path to get the correct path within the zip file
image_file_path = os.path.normpath(os.path.join('ppt/slides', target))
# Replace backslashes with forward slashes for ZIP compatibility
image_file_path = image_file_path.replace('\\', '/')
tmpdirname = os.path.dirname(ppt_file_localhost_path)
myzip.extract(image_file_path, tmpdirname)
image_file_path = os.path.join(tmpdirname, image_file_path)
return image_file_path
else: # absolute path
assert target.startswith("file://"), target
image_file_path = target[7:]
break
if image_file_path is None:
return None
else:
# Get the audio file from vm and return the file path in the host
return get_vm_file(env, {"path": image_file_path, "dest": dest})
def get_audio_in_slide(env, config: Dict[str, str]):
ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"]
# Open the .pptx file as a zip file, fixme: now we assume there is only one audio file in the slides
audio_file_path = None
ppt_file_localhost_path = get_vm_file(env, {"path": ppt_file_path, "dest": os.path.split(ppt_file_path)[-1]})
with zipfile.ZipFile(ppt_file_localhost_path, 'r') as myzip:
# Find the relationships XML file for the first slide
slide1_rels_file = 'ppt/slides/_rels/slide{}.xml.rels'.format(slide_index + 1)
if slide1_rels_file in myzip.namelist():
with myzip.open(slide1_rels_file) as f:
# Parse the XML tree from the relationships file
tree = ET.parse(f)
root = tree.getroot()
# Define the namespace used in the relationships file
namespaces = {'r': 'http://schemas.openxmlformats.org/package/2006/relationships'}
# Look for all relationship elements that have a type attribute for audio
for rel in root.findall('r:Relationship', namespaces):
# Check if the relationship is for an audio file
if 'audio' in rel.attrib['Type']:
# The audio can be embedded inside the file or linked to an external file
# Get the target attribute which contains the audio file path
target = rel.attrib['Target']
if target.startswith('..'):
# Resolve the relative path to get the correct path within the zip file
audio_file_path = os.path.normpath(os.path.join('ppt/slides', target))
# Replace backslashes with forward slashes for ZIP compatibility
audio_file_path = audio_file_path.replace('\\', '/')
# Create a temporary directory to extract the audio file
tmpdirname = os.path.dirname(ppt_file_localhost_path)
myzip.extract(audio_file_path, tmpdirname)
audio_file_path = os.path.join(tmpdirname, audio_file_path)
return audio_file_path
# with tempfile.TemporaryDirectory() as tmpdirname:
# # Extract the audio file
# myzip.extract(audio_file_path, tmpdirname)
# # Get the full path of the extracted audio file
# extracted_audio_path = os.path.join(tmpdirname, audio_file_path)
# # Return the extracted audio file path
# audio_file_path = extracted_audio_path
else:
# the audio file is external to the .pptx file
# Return the audio file path
assert target.startswith("file://"), target
audio_file_path = target[7:]
break
if audio_file_path is None:
return None
else:
# Get the audio file from vm and return the file path in the host
return get_vm_file(env, {"path": audio_file_path, "dest": dest})