Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/modules/file.py: 0%

757 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-02-07 19:28 +0000

1import base64 

2import datetime 

3import hashlib 

4import hmac 

5import html 

6import io 

7import json 

8import logging 

9import re 

10import string 

11import typing as t 

12import warnings 

13from collections import namedtuple 

14from urllib.parse import quote as urlquote, urlencode 

15from urllib.request import urlopen 

16 

17import PIL 

18import PIL.ImageCms 

19import google.auth 

20import requests 

21from PIL import Image 

22from google.appengine.api import blobstore, images 

23from google.cloud import storage 

24from google.oauth2.service_account import Credentials as ServiceAccountCredentials 

25 

26from viur.core import conf, current, db, errors, utils 

27from viur.core.bones import BaseBone, BooleanBone, KeyBone, NumericBone, StringBone 

28from viur.core.decorators import * 

29from viur.core.i18n import LanguageWrapper 

30from viur.core.prototypes.tree import SkelType, Tree, TreeSkel 

31from viur.core.skeleton import SkeletonInstance, skeletonByKind 

32from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask 

33 

34# Globals for connectivity 

35 

36VALID_FILENAME_REGEX = re.compile( 

37 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|` 

38 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$", 

39 re.IGNORECASE 

40) 

41 

42_CREDENTIALS, _PROJECT_ID = google.auth.default() 

43GOOGLE_STORAGE_CLIENT = storage.Client(_PROJECT_ID, _CREDENTIALS) 

44 

45PRIVATE_BUCKET_NAME = f"""{_PROJECT_ID}.appspot.com""" 

46PUBLIC_BUCKET_NAME = f"""public-dot-{_PROJECT_ID}""" 

47PUBLIC_DLKEY_SUFFIX = "_pub" 

48 

49_private_bucket = GOOGLE_STORAGE_CLIENT.lookup_bucket(PRIVATE_BUCKET_NAME) 

50_public_bucket = None 

51 

52# FilePath is a descriptor for ViUR file components 

53FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename")) 

54 

55 

56def importBlobFromViur2(dlKey, fileName): 

57 bucket = File.get_bucket(dlKey) 

58 

59 if not conf.viur2import_blobsource: 

60 return False 

61 existingImport = db.Get(db.Key("viur-viur2-blobimport", dlKey)) 

62 if existingImport: 

63 if existingImport["success"]: 

64 return existingImport["dlurl"] 

65 return False 

66 if conf.viur2import_blobsource["infoURL"]: 

67 try: 

68 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey) 

69 except Exception as e: 

70 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

71 marker["success"] = False 

72 marker["error"] = "Failed URL-FETCH 1" 

73 db.Put(marker) 

74 return False 

75 if importDataReq.status != 200: 

76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

77 marker["success"] = False 

78 marker["error"] = "Failed URL-FETCH 2" 

79 db.Put(marker) 

80 return False 

81 importData = json.loads(importDataReq.read()) 

82 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"] 

83 srcBlob = storage.Blob(bucket=bucket, 

84 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]) 

85 else: 

86 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey 

87 srcBlob = storage.Blob(bucket=bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey) 

88 if not srcBlob.exists(): 

89 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

90 marker["success"] = False 

91 marker["error"] = "Local SRC-Blob missing" 

92 marker["oldBlobName"] = oldBlobName 

93 db.Put(marker) 

94 return False 

95 bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}") 

96 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

97 marker["success"] = True 

98 marker["old_src_key"] = dlKey 

99 marker["old_src_name"] = fileName 

100 marker["dlurl"] = File.create_download_url(dlKey, fileName, False, None) 

101 db.Put(marker) 

102 return marker["dlurl"] 

103 

104 

105def thumbnailer(fileSkel, existingFiles, params): 

106 file_name = html.unescape(fileSkel["name"]) 

107 bucket = File.get_bucket(fileSkel["dlkey"]) 

108 blob = bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""") 

109 if not blob: 

110 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""") 

111 return 

112 fileData = io.BytesIO() 

113 blob.download_to_file(fileData) 

114 resList = [] 

115 for sizeDict in params: 

116 fileData.seek(0) 

117 outData = io.BytesIO() 

118 try: 

119 img = PIL.Image.open(fileData) 

120 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions 

121 return [] 

122 iccProfile = img.info.get('icc_profile') 

123 if iccProfile: 

124 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert 

125 # to WEBp as we'll loose this color-profile information 

126 f = io.BytesIO(iccProfile) 

127 src_profile = PIL.ImageCms.ImageCmsProfile(f) 

128 dst_profile = PIL.ImageCms.createProfile('sRGB') 

129 try: 

130 img = PIL.ImageCms.profileToProfile( 

131 img, 

132 inputProfile=src_profile, 

133 outputProfile=dst_profile, 

134 outputMode="RGBA" if img.has_transparency_data else "RGB") 

135 except Exception as e: 

136 logging.exception(e) 

137 continue 

138 fileExtension = sizeDict.get("fileExtension", "webp") 

139 if "width" in sizeDict and "height" in sizeDict: 

140 width = sizeDict["width"] 

141 height = sizeDict["height"] 

142 targetName = f"thumbnail-{width}-{height}.{fileExtension}" 

143 elif "width" in sizeDict: 

144 width = sizeDict["width"] 

145 height = int((float(img.size[1]) * float(width / float(img.size[0])))) 

146 targetName = f"thumbnail-w{width}.{fileExtension}" 

147 else: # No default fallback - ignore 

148 continue 

149 mimeType = sizeDict.get("mimeType", "image/webp") 

150 img = img.resize((width, height), PIL.Image.LANCZOS) 

151 img.save(outData, fileExtension) 

152 outSize = outData.tell() 

153 outData.seek(0) 

154 targetBlob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{targetName}""") 

155 targetBlob.upload_from_file(outData, content_type=mimeType) 

156 resList.append((targetName, outSize, mimeType, {"mimetype": mimeType, "width": width, "height": height})) 

157 return resList 

158 

159 

160def cloudfunction_thumbnailer(fileSkel, existingFiles, params): 

161 """External Thumbnailer for images. 

162 

163 The corresponding cloudfunction can be found here . 

164 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer 

165 

166 You can use it like so: 

167 main.py: 

168 

169 .. code-block:: python 

170 

171 from viur.core.modules.file import cloudfunction_thumbnailer 

172 

173 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer" 

174 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer} 

175 

176 conf.derives_pdf = { 

177 "thumbnail": [{"width": 1920,"sites":"1,2"}] 

178 } 

179 

180 skeletons/xxx.py: 

181 .. code-block:: python 

182 

183 test = FileBone(derive=conf.derives_pdf) 

184 """ 

185 

186 if not conf.file_thumbnailer_url: 

187 raise ValueError("conf.file_thumbnailer_url is not set") 

188 

189 bucket = File.get_bucket(fileSkel["dlkey"]) 

190 

191 def getsignedurl(): 

192 if conf.instance.is_dev_server: 

193 signedUrl = File.create_download_url(fileSkel["dlkey"], fileSkel["name"]) 

194 else: 

195 path = f"""{fileSkel["dlkey"]}/source/{file_name}""" 

196 if not (blob := bucket.get_blob(path)): 

197 logging.warning(f"Blob {path} is missing from cloud storage!") 

198 return None 

199 authRequest = google.auth.transport.requests.Request() 

200 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

201 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

202 content_disposition = f"""filename={fileSkel["name"]}""" 

203 signedUrl = blob.generate_signed_url( 

204 expiresAt, 

205 credentials=signing_credentials, 

206 response_disposition=content_disposition, 

207 version="v4") 

208 return signedUrl 

209 

210 def make_request(): 

211 headers = {"Content-Type": "application/json"} 

212 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8")) 

213 sig = File.hmac_sign(data_str) 

214 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig}) 

215 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False) 

216 if resp.status_code != 200: # Error Handling 

217 match resp.status_code: 

218 case 302: 

219 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found 

220 # https://cloud.google.com/functions/docs/troubleshooting#login 

221 logging.error("Cloudfunction not found") 

222 case 404: 

223 logging.error("Cloudfunction not found") 

224 case 403: 

225 logging.error("No permission for the Cloudfunction") 

226 case _: 

227 logging.error( 

228 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}") 

229 return 

230 

231 try: 

232 response_data = resp.json() 

233 except Exception as e: 

234 logging.error(f"response could not be converted in json failed with: {e=}") 

235 return 

236 if "error" in response_data: 

237 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}") 

238 return 

239 

240 return response_data 

241 

242 file_name = html.unescape(fileSkel["name"]) 

243 

244 if not (url := getsignedurl()): 

245 return 

246 dataDict = { 

247 "url": url, 

248 "name": fileSkel["name"], 

249 "params": params, 

250 "minetype": fileSkel["mimetype"], 

251 "baseUrl": current.request.get().request.host_url.lower(), 

252 "targetKey": fileSkel["dlkey"], 

253 "nameOnly": True 

254 } 

255 if not (derivedData := make_request()): 

256 return 

257 

258 uploadUrls = {} 

259 for data in derivedData["values"]: 

260 fileName = File.sanitize_filename(data["name"]) 

261 blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""") 

262 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60, 

263 content_type=data["mimeType"]) 

264 

265 if not (url := getsignedurl()): 

266 return 

267 

268 dataDict["url"] = url 

269 dataDict["nameOnly"] = False 

270 dataDict["uploadUrls"] = uploadUrls 

271 

272 if not (derivedData := make_request()): 

273 return 

274 reslist = [] 

275 try: 

276 for derived in derivedData["values"]: 

277 for key, value in derived.items(): 

278 reslist.append((key, value["size"], value["mimetype"], value["customData"])) 

279 

280 except Exception as e: 

281 logging.error(f"cloudfunction_thumbnailer failed with: {e=}") 

282 return reslist 

283 

284 

285class DownloadUrlBone(BaseBone): 

286 """ 

287 This bone is used to inject a freshly signed download url into a FileSkel. 

288 """ 

289 

290 def unserialize(self, skel, name): 

291 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity: 

292 skel.accessedValues[name] = File.create_download_url( 

293 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration 

294 ) 

295 return True 

296 

297 return False 

298 

299 

300class FileLeafSkel(TreeSkel): 

301 """ 

302 Default file leaf skeleton. 

303 """ 

304 kindName = "file" 

305 

306 size = StringBone( 

307 descr="Size", 

308 readOnly=True, 

309 searchable=True, 

310 ) 

311 

312 dlkey = StringBone( 

313 descr="Download-Key", 

314 readOnly=True, 

315 ) 

316 

317 name = StringBone( 

318 descr="Filename", 

319 caseSensitive=False, 

320 searchable=True, 

321 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided", 

322 ) 

323 

324 mimetype = StringBone( 

325 descr="MIME-Type", 

326 readOnly=True, 

327 ) 

328 

329 weak = BooleanBone( 

330 descr="Weak reference", 

331 readOnly=True, 

332 visible=False, 

333 ) 

334 

335 pending = BooleanBone( 

336 descr="Pending upload", 

337 readOnly=True, 

338 visible=False, 

339 defaultValue=False, 

340 ) 

341 

342 width = NumericBone( 

343 descr="Width", 

344 readOnly=True, 

345 searchable=True, 

346 ) 

347 

348 height = NumericBone( 

349 descr="Height", 

350 readOnly=True, 

351 searchable=True, 

352 ) 

353 

354 downloadUrl = DownloadUrlBone( 

355 descr="Download-URL", 

356 readOnly=True, 

357 visible=False, 

358 ) 

359 

360 derived = BaseBone( 

361 descr="Derived Files", 

362 readOnly=True, 

363 visible=False, 

364 ) 

365 

366 pendingparententry = KeyBone( 

367 descr="Pending key Reference", 

368 readOnly=True, 

369 visible=False, 

370 ) 

371 

372 crc32c_checksum = StringBone( 

373 descr="CRC32C checksum", 

374 readOnly=True, 

375 ) 

376 

377 md5_checksum = StringBone( 

378 descr="MD5 checksum", 

379 readOnly=True, 

380 ) 

381 

382 public = BooleanBone( 

383 descr="Public File", 

384 readOnly=True, 

385 defaultValue=False, 

386 ) 

387 

388 serving_url = StringBone( 

389 descr="Serving-URL", 

390 readOnly=True, 

391 params={ 

392 "tooltip": "The 'serving_url' is only available in public file repositories.", 

393 } 

394 ) 

395 

396 @classmethod 

397 def _inject_serving_url(cls, skel: SkeletonInstance) -> None: 

398 """Inject the serving url for public image files into a FileSkel""" 

399 if ( 

400 skel["public"] 

401 and skel["mimetype"] 

402 and skel["mimetype"].startswith("image/") 

403 and not skel["serving_url"] 

404 ): 

405 bucket = File.get_bucket(skel["dlkey"]) 

406 filename = f"/gs/{bucket.name}/{skel['dlkey']}/source/{skel['name']}" 

407 

408 # Trying this on local development server will raise a 

409 # `google.appengine.runtime.apiproxy_errors.RPCFailedError` 

410 if conf.instance.is_dev_server: 

411 logging.warning(f"Can't inject serving_url for {filename!r} on local development server") 

412 return 

413 

414 try: 

415 skel["serving_url"] = images.get_serving_url(None, secure_url=True, filename=filename) 

416 

417 except Exception as e: 

418 logging.warning(f"Failed to create serving_url for {filename!r} with exception {e!r}") 

419 logging.exception(e) 

420 

421 def preProcessBlobLocks(self, locks): 

422 """ 

423 Ensure that our dlkey is locked even if we don't have a filebone here 

424 """ 

425 if not self["weak"] and self["dlkey"]: 

426 locks.add(self["dlkey"]) 

427 return locks 

428 

429 @classmethod 

430 def refresh(cls, skel): 

431 super().refresh(skel) 

432 if conf.viur2import_blobsource: 

433 importData = importBlobFromViur2(skel["dlkey"], skel["name"]) 

434 if importData: 

435 if not skel["downloadUrl"]: 

436 skel["downloadUrl"] = importData 

437 skel["pendingparententry"] = None 

438 

439 cls._inject_serving_url(skel) 

440 

441 @classmethod 

442 def write(cls, skel, **kwargs): 

443 cls._inject_serving_url(skel) 

444 return super().write(skel, **kwargs) 

445 

446 

447class FileNodeSkel(TreeSkel): 

448 """ 

449 Default file node skeleton. 

450 """ 

451 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname 

452 

453 name = StringBone( 

454 descr="Name", 

455 required=True, 

456 searchable=True 

457 ) 

458 

459 rootNode = BooleanBone( 

460 descr="Is RootNode", 

461 defaultValue=False, 

462 readOnly=True, 

463 visible=False, 

464 ) 

465 

466 public = BooleanBone( 

467 descr="Is public?", 

468 defaultValue=False, 

469 readOnly=True, 

470 visible=False, 

471 ) 

472 

473 viurCurrentSeoKeys = None 

474 

475 

476class File(Tree): 

477 PENDING_POSTFIX = " (pending)" 

478 DOWNLOAD_URL_PREFIX = "/file/download/" 

479 INTERNAL_SERVING_URL_PREFIX = "/file/serve/" 

480 MAX_FILENAME_LEN = 256 

481 IMAGE_META_MAX_SIZE: t.Final[int] = 10 * 1024 ** 2 

482 """Maximum size of image files that should be analysed in :meth:`set_image_meta`. 

483 Default: 10 MiB""" 

484 

485 leafSkelCls = FileLeafSkel 

486 nodeSkelCls = FileNodeSkel 

487 

488 handler = "tree.simple.file" 

489 adminInfo = { 

490 "icon": "folder-fill", 

491 "handler": handler, # fixme: Use static handler; Remove with VIUR4! 

492 } 

493 

494 roles = { 

495 "*": "view", 

496 "editor": ("add", "edit"), 

497 "admin": "*", 

498 } 

499 

500 default_order = "name" 

501 

502 # Helper functions currently resist here 

503 

504 @staticmethod 

505 def get_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket: 

506 """ 

507 Retrieves a Google Cloud Storage bucket for the given dlkey. 

508 """ 

509 global _public_bucket 

510 if dlkey and dlkey.endswith(PUBLIC_DLKEY_SUFFIX): 

511 if _public_bucket or (_public_bucket := GOOGLE_STORAGE_CLIENT.lookup_bucket(PUBLIC_BUCKET_NAME)): 

512 return _public_bucket 

513 

514 raise ValueError( 

515 f"""The bucket '{PUBLIC_BUCKET_NAME}' does not exist! Please create it with ACL access.""" 

516 ) 

517 

518 return _private_bucket 

519 

520 @staticmethod 

521 def is_valid_filename(filename: str) -> bool: 

522 """ 

523 Verifies a valid filename. 

524 

525 The filename should be valid on Linux, Mac OS and Windows. 

526 It should not be longer than MAX_FILENAME_LEN chars. 

527 

528 Rule set: https://stackoverflow.com/a/31976060/3749896 

529 Regex test: https://regex101.com/r/iBYpoC/1 

530 """ 

531 if len(filename) > File.MAX_FILENAME_LEN: 

532 return False 

533 

534 return bool(re.match(VALID_FILENAME_REGEX, filename)) 

535 

536 @staticmethod 

537 def hmac_sign(data: t.Any) -> str: 

538 assert conf.file_hmac_key is not None, "No hmac-key set!" 

539 if not isinstance(data, bytes): 

540 data = str(data).encode("UTF-8") 

541 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest() 

542 

543 @staticmethod 

544 def hmac_verify(data: t.Any, signature: str) -> bool: 

545 return hmac.compare_digest(File.hmac_sign(data.encode("ASCII")), signature) 

546 

547 @staticmethod 

548 def create_internal_serving_url( 

549 serving_url: str, 

550 size: int = 0, 

551 filename: str = "", 

552 options: str = "", 

553 download: bool = False 

554 ) -> str: 

555 """ 

556 Helper function to generate an internal serving url (endpoint: /file/serve) from a Google serving url. 

557 

558 This is needed to hide requests to Google as they are internally be routed, and can be the result of a 

559 legal requirement like GDPR. 

560 

561 :param serving_url: Is the original serving URL as generated from FileLeafSkel._inject_serving_url() 

562 :param size: Optional size setting 

563 :param filename: Optonal filename setting 

564 :param options: Additional options parameter-pass through to /file/serve 

565 :param download: Download parameter-pass through to /file/serve 

566 """ 

567 

568 # Split a serving URL into its components, used by serve function. 

569 res = re.match( 

570 r"^https:\/\/(.*?)\.googleusercontent\.com\/(.*?)$", 

571 serving_url 

572 ) 

573 

574 if not res: 

575 raise ValueError(f"Invalid {serving_url=!r} provided") 

576 

577 # Create internal serving URL 

578 serving_url = File.INTERNAL_SERVING_URL_PREFIX + "/".join(res.groups()) 

579 

580 # Append additional parameters 

581 if params := { 

582 k: v for k, v in { 

583 "download": download, 

584 "filename": filename, 

585 "options": options, 

586 "size": size, 

587 }.items() if v 

588 }: 

589 serving_url += f"?{urlencode(params)}" 

590 

591 return serving_url 

592 

593 @staticmethod 

594 def create_download_url( 

595 dlkey: str, 

596 filename: str, 

597 derived: bool = False, 

598 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

599 download_filename: t.Optional[str] = None 

600 ) -> str: 

601 """ 

602 Utility function that creates a signed download-url for the given folder/filename combination 

603 

604 :param folder: The GCS-Folder (= the download-key) for that file 

605 :param filename: The name of the file. Either the original filename or the name of a derived file. 

606 :param derived: True, if it points to a derived file, False if it points to the original uploaded file 

607 :param expires: 

608 None if the file is supposed to be public (which causes it to be cached on the google ede caches), 

609 otherwise a datetime.timedelta of how long that link should be valid 

610 :param download_filename: If set, browser is enforced to download this blob with the given alternate 

611 filename 

612 :return: The signed download-url relative to the current domain (eg /download/...) 

613 """ 

614 if isinstance(expires, int): 

615 expires = datetime.timedelta(minutes=expires) 

616 

617 # Undo escaping on ()= performed on fileNames 

618 filename = filename.replace("&#040;", "(").replace("&#041;", ")").replace("&#061;", "=") 

619 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}""" 

620 

621 if download_filename: 

622 if not File.is_valid_filename(download_filename): 

623 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided") 

624 

625 download_filename = urlquote(download_filename) 

626 

627 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0 

628 

629 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8")) 

630 sig = File.hmac_sign(data) 

631 

632 return f"""{File.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}""" 

633 

634 @staticmethod 

635 def parse_download_url(url) -> t.Optional[FilePath]: 

636 """ 

637 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath. 

638 

639 If the URL cannot be parsed, the function returns None. 

640 

641 :param url: The file download URL to be parsed. 

642 :return: A FilePath on success, None otherwise. 

643 """ 

644 if not url.startswith(File.DOWNLOAD_URL_PREFIX) or "?" not in url: 

645 return None 

646 

647 data, sig = url.removeprefix(File.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?" 

648 sig = sig.removeprefix("sig=") 

649 

650 if not File.hmac_verify(data, sig): 

651 # Invalid signature 

652 return None 

653 

654 # Split the blobKey into the individual fields it should contain 

655 data = base64.urlsafe_b64decode(data).decode("UTF-8") 

656 

657 match data.count("\0"): 

658 case 2: 

659 dlpath, valid_until, _ = data.split("\0") 

660 case 1: 

661 # It's the old format, without an downloadFileName 

662 dlpath, valid_until = data.split("\0") 

663 case _: 

664 # Invalid path 

665 return None 

666 

667 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now(): 

668 # Signature expired 

669 return None 

670 

671 if dlpath.count("/") != 2: 

672 # Invalid path 

673 return None 

674 

675 dlkey, derived, filename = dlpath.split("/") 

676 return FilePath(dlkey, derived != "source", filename) 

677 

678 @staticmethod 

679 def create_src_set( 

680 file: t.Union["SkeletonInstance", dict, str], 

681 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

682 width: t.Optional[int] = None, 

683 height: t.Optional[int] = None, 

684 language: t.Optional[str] = None, 

685 ) -> str: 

686 """ 

687 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser 

688 with a list of images in different sizes and allows it to choose the smallest file that will fill it's 

689 viewport without upscaling. 

690 

691 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset. 

692 :param expires: 

693 None if the file is supposed to be public (which causes it to be cached on the google edecaches), 

694 otherwise it's lifetime in seconds 

695 :param width: 

696 A list of widths that should be included in the srcset. 

697 If a given width is not available, it will be skipped. 

698 :param height: A list of heights that should be included in the srcset. If a given height is not available, 

699 it will be skipped. 

700 :param language: Language overwrite if file has multiple languages, and we want to explicitly specify one 

701 :return: The srctag generated or an empty string if a invalid file object was supplied 

702 """ 

703 if not width and not height: 

704 logging.error("Neither width or height supplied") 

705 return "" 

706 

707 if isinstance(file, str): 

708 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry() 

709 

710 if not file: 

711 return "" 

712 

713 if isinstance(file, LanguageWrapper): 

714 language = language or current.language.get() 

715 if not language or not (file := file.get(language)): 

716 return "" 

717 

718 if "dlkey" not in file and "dest" in file: 

719 file = file["dest"] 

720 

721 from viur.core.skeleton import SkeletonInstance # avoid circular imports 

722 

723 if not ( 

724 isinstance(file, (SkeletonInstance, dict)) 

725 and "dlkey" in file 

726 and "derived" in file 

727 ): 

728 logging.error("Invalid file supplied") 

729 return "" 

730 

731 if not isinstance(file["derived"], dict): 

732 logging.error("No derives available") 

733 return "" 

734 

735 src_set = [] 

736 for filename, derivate in file["derived"]["files"].items(): 

737 customData = derivate.get("customData", {}) 

738 

739 if width and customData.get("width") in width: 

740 src_set.append( 

741 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w""" 

742 ) 

743 

744 if height and customData.get("height") in height: 

745 src_set.append( 

746 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h""" 

747 ) 

748 

749 return ", ".join(src_set) 

750 

751 def write( 

752 self, 

753 filename: str, 

754 content: t.Any, 

755 mimetype: str = "text/plain", 

756 width: int = None, 

757 height: int = None, 

758 public: bool = False, 

759 ) -> db.Key: 

760 """ 

761 Write a file from any buffer into the file module. 

762 

763 :param filename: Filename to be written. 

764 :param content: The file content to be written, as bytes-like object. 

765 :param mimetype: The file's mimetype. 

766 :param width: Optional width information for the file. 

767 :param height: Optional height information for the file. 

768 :param public: True if the file should be publicly accessible. 

769 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone. 

770 """ 

771 # logging.info(f"{filename=} {mimetype=} {width=} {height=} {public=}") 

772 if not File.is_valid_filename(filename): 

773 raise ValueError(f"{filename=} is invalid") 

774 

775 dl_key = utils.string.random() 

776 

777 if public: 

778 dl_key += PUBLIC_DLKEY_SUFFIX # mark file as public 

779 

780 bucket = File.get_bucket(dl_key) 

781 

782 blob = bucket.blob(f"{dl_key}/source/{filename}") 

783 blob.upload_from_file(io.BytesIO(content), content_type=mimetype) 

784 

785 skel = self.addSkel("leaf") 

786 skel["name"] = filename 

787 skel["size"] = blob.size 

788 skel["mimetype"] = mimetype 

789 skel["dlkey"] = dl_key 

790 skel["weak"] = True 

791 skel["public"] = public 

792 skel["width"] = width 

793 skel["height"] = height 

794 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex() 

795 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex() 

796 

797 skel.write() 

798 return skel["key"] 

799 

800 def read( 

801 self, 

802 key: db.Key | int | str | None = None, 

803 path: str | None = None, 

804 ) -> tuple[io.BytesIO, str]: 

805 """ 

806 Read a file from the Cloud Storage. 

807 

808 If a key and a path are provided, the key is preferred. 

809 This means that the entry in the db is searched first and if this is not found, the path is used. 

810 

811 :param key: Key of the LeafSkel that contains the "dlkey" and the "name". 

812 :param path: The path of the file in the Cloud Storage Bucket. 

813 

814 :return: Returns the file as a io.BytesIO buffer and the content-type 

815 """ 

816 if not key and not path: 

817 raise ValueError("Please provide a key or a path") 

818 

819 if key: 

820 skel = self.viewSkel("leaf") 

821 if not skel.read(db.keyHelper(key, skel.kindName)): 

822 if not path: 

823 raise ValueError("This skeleton is not in the database!") 

824 else: 

825 path = f"""{skel["dlkey"]}/source/{skel["name"]}""" 

826 

827 bucket = File.get_bucket(skel["dlkey"]) 

828 else: 

829 bucket = File.get_bucket(path.split("/", 1)[0]) # path's first part is dlkey plus eventual postfix 

830 

831 blob = bucket.blob(path) 

832 return io.BytesIO(blob.download_as_bytes()), blob.content_type 

833 

834 @CallDeferred 

835 def deleteRecursive(self, parentKey): 

836 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter() 

837 for fileEntry in files: 

838 self.mark_for_deletion(fileEntry["dlkey"]) 

839 skel = self.leafSkelCls() 

840 

841 if skel.read(str(fileEntry.key())): 

842 skel.delete() 

843 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter() 

844 for d in dirs: 

845 self.deleteRecursive(d.key) 

846 skel = self.nodeSkelCls() 

847 if skel.read(d.key): 

848 skel.delete() 

849 

850 @exposed 

851 @skey 

852 def getUploadURL( 

853 self, 

854 fileName: str, 

855 mimeType: str, 

856 size: t.Optional[int] = None, 

857 node: t.Optional[str | db.Key] = None, 

858 authData: t.Optional[str] = None, 

859 authSig: t.Optional[str] = None, 

860 public: bool = False, 

861 ): 

862 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names 

863 

864 if not File.is_valid_filename(filename): 

865 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided") 

866 

867 # Validate the mimetype from the client seems legit 

868 mimetype = mimeType.strip().lower() 

869 if not ( 

870 mimetype 

871 and mimetype.count("/") == 1 

872 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype) 

873 ): 

874 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided") 

875 

876 # Validate authentication data 

877 if authData and authSig: 

878 # First, validate the signature, otherwise we don't need to proceed further 

879 if not self.hmac_verify(authData, authSig): 

880 raise errors.Unauthorized() 

881 

882 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8")) 

883 

884 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now(): 

885 raise errors.Gone("The upload URL has expired") 

886 

887 if authData["validMimeTypes"]: 

888 for validMimeType in authData["validMimeTypes"]: 

889 if ( 

890 validMimeType == mimetype 

891 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1])) 

892 ): 

893 break 

894 else: 

895 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided") 

896 

897 node = authData["node"] 

898 maxSize = authData["maxSize"] 

899 

900 else: 

901 rootNode = None 

902 if node and not (rootNode := self.getRootNode(node)): 

903 raise errors.NotFound(f"No valid root node found for {node=}") 

904 

905 if not self.canAdd("leaf", rootNode): 

906 raise errors.Forbidden() 

907 

908 if rootNode and public != bool(rootNode.get("public")): 

909 raise errors.Forbidden("Cannot upload a public file into private repository or vice versa") 

910 

911 maxSize = None # The user has some file/add permissions, don't restrict fileSize 

912 

913 if maxSize: 

914 if size > maxSize: 

915 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}") 

916 else: 

917 size = None 

918 

919 # Create upload-URL and download key 

920 dlkey = utils.string.random() # let's roll a random key 

921 

922 if public: 

923 dlkey += PUBLIC_DLKEY_SUFFIX # mark file as public 

924 

925 blob = File.get_bucket(dlkey).blob(f"{dlkey}/source/{filename}") 

926 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60) 

927 

928 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object 

929 # the user creates matches the file he had uploaded 

930 file_skel = self.addSkel("leaf") 

931 

932 file_skel["name"] = filename + self.PENDING_POSTFIX 

933 file_skel["size"] = 0 

934 file_skel["mimetype"] = "application/octetstream" 

935 file_skel["dlkey"] = dlkey 

936 file_skel["parentdir"] = None 

937 file_skel["pendingparententry"] = db.keyHelper(node, self.addSkel("node").kindName) if node else None 

938 file_skel["pending"] = True 

939 file_skel["weak"] = True 

940 file_skel["public"] = public 

941 file_skel["width"] = 0 

942 file_skel["height"] = 0 

943 

944 file_skel.write() 

945 key = str(file_skel["key"]) 

946 

947 # Mark that entry dirty as we might never receive an add 

948 self.mark_for_deletion(dlkey) 

949 

950 # In this case, we'd have to store the key in the users session so he can call add() later on 

951 if authData and authSig: 

952 session = current.session.get() 

953 

954 if "pendingFileUploadKeys" not in session: 

955 session["pendingFileUploadKeys"] = [] 

956 

957 session["pendingFileUploadKeys"].append(key) 

958 

959 # Clamp to the latest 50 pending uploads 

960 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:] 

961 session.markChanged() 

962 

963 return self.render.view({ 

964 "uploadKey": key, 

965 "uploadUrl": upload_url, 

966 }) 

967 

968 @exposed 

969 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs): 

970 """ 

971 Download a file. 

972 :param blobKey: The unique blob key of the file. 

973 :param fileName: Optional filename to provide in the header. 

974 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted. 

975 """ 

976 if filename := fileName.strip(): 

977 if not File.is_valid_filename(filename): 

978 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!") 

979 

980 download_filename = "" 

981 

982 try: 

983 dlPath, validUntil, download_filename = base64.urlsafe_b64decode( 

984 blobKey).decode("UTF-8").split("\0") 

985 except Exception as e: # It's the old format, without an downloadFileName 

986 dlPath, validUntil = base64.urlsafe_b64decode(blobKey).decode( 

987 "UTF-8").split("\0") 

988 

989 bucket = File.get_bucket(dlPath.split("/", 1)[0]) 

990 

991 if not sig: 

992 # Check if the current user has the right to download *any* blob present in this application. 

993 # blobKey is then the path inside cloudstore - not a base64 encoded tuple 

994 if not (usr := current.user.get()): 

995 raise errors.Unauthorized() 

996 if "root" not in usr["access"] and "file-view" not in usr["access"]: 

997 raise errors.Forbidden() 

998 validUntil = "-1" # Prevent this from being cached down below 

999 blob = bucket.get_blob(blobKey) 

1000 

1001 else: 

1002 # We got an request including a signature (probably a guest or a user without file-view access) 

1003 # First, validate the signature, otherwise we don't need to proceed any further 

1004 if not self.hmac_verify(blobKey, sig): 

1005 raise errors.Forbidden() 

1006 

1007 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now(): 

1008 blob = None 

1009 else: 

1010 blob = bucket.get_blob(dlPath) 

1011 

1012 if not blob: 

1013 raise errors.Gone("The requested blob has expired.") 

1014 

1015 if not filename: 

1016 filename = download_filename or urlquote(blob.name.rsplit("/", 1)[-1]) 

1017 

1018 content_disposition = "; ".join( 

1019 item for item in ( 

1020 "attachment" if download else None, 

1021 f"filename={filename}" if filename else None, 

1022 ) if item 

1023 ) 

1024 

1025 if isinstance(_CREDENTIALS, ServiceAccountCredentials): 

1026 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

1027 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4") 

1028 raise errors.Redirect(signedUrl) 

1029 

1030 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly 

1031 response = current.request.get().response 

1032 response.headers["Content-Type"] = blob.content_type 

1033 if content_disposition: 

1034 response.headers["Content-Disposition"] = content_disposition 

1035 return blob.download_as_bytes() 

1036 

1037 if validUntil == "0" or blobKey.endswith(PUBLIC_DLKEY_SUFFIX): # Its an indefinitely valid URL 

1038 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches 

1039 response = current.request.get().response 

1040 response.headers["Content-Type"] = blob.content_type 

1041 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

1042 if content_disposition: 

1043 response.headers["Content-Disposition"] = content_disposition 

1044 return blob.download_as_bytes() 

1045 

1046 # Default fallback - create a signed URL and redirect 

1047 authRequest = google.auth.transport.requests.Request() 

1048 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

1049 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

1050 signedUrl = blob.generate_signed_url( 

1051 expiresAt, 

1052 credentials=signing_credentials, 

1053 response_disposition=content_disposition, 

1054 version="v4") 

1055 

1056 raise errors.Redirect(signedUrl) 

1057 

1058 SERVE_VALID_OPTIONS = { 

1059 "c", 

1060 "p", 

1061 "fv", 

1062 "fh", 

1063 "r90", 

1064 "r180", 

1065 "r270", 

1066 "nu", 

1067 } 

1068 """ 

1069 Valid modification option shorts for the serve-function. 

1070 This is passed-through to the Google UserContent API, and hast to be supported there. 

1071 """ 

1072 

1073 SERVE_VALID_FORMATS = { 

1074 "jpg": "rj", 

1075 "jpeg": "rj", 

1076 "png": "rp", 

1077 "webp": "rw", 

1078 } 

1079 """ 

1080 Valid file-formats to the serve-function. 

1081 This is passed-through to the Google UserContent API, and hast to be supported there. 

1082 """ 

1083 

1084 @exposed 

1085 def serve( 

1086 self, 

1087 host: str, 

1088 key: str, 

1089 size: t.Optional[int] = None, 

1090 filename: t.Optional[str] = None, 

1091 options: str = "", 

1092 download: bool = False, 

1093 ): 

1094 """ 

1095 Requests an image using the serving url to bypass direct Google requests. 

1096 

1097 :param host: the google host prefix i.e. lh3 

1098 :param key: the serving url key 

1099 :param size: the target image size 

1100 :param filename: a random string with an extention, valid extentions are (defined in File.SERVE_VALID_FORMATS). 

1101 :param options: - seperated options (defined in File.SERVE_VALID_OPTIONS). 

1102 c - crop 

1103 p - face crop 

1104 fv - vertrical flip 

1105 fh - horizontal flip 

1106 rXXX - rotate 90, 180, 270 

1107 nu - no upscale 

1108 :param download: Serves the content as download (Content-Disposition) or not. 

1109 

1110 :return: Returns the requested content on success, raises a proper HTTP exception otherwise. 

1111 """ 

1112 

1113 if any(c not in conf.search_valid_chars for c in host): 

1114 raise errors.BadRequest("key contains invalid characters") 

1115 

1116 # extract format from filename 

1117 file_fmt = "webp" 

1118 

1119 if filename: 

1120 fmt = filename.rsplit(".", 1)[-1].lower() 

1121 if fmt in self.SERVE_VALID_FORMATS: 

1122 file_fmt = fmt 

1123 else: 

1124 raise errors.UnprocessableEntity(f"Unsupported filetype {fmt}") 

1125 

1126 url = f"https://{host}.googleusercontent.com/{key}" 

1127 

1128 if options and not all(param in self.SERVE_VALID_OPTIONS for param in options.split("-")): 

1129 raise errors.BadRequest("Invalid options provided") 

1130 

1131 options += f"-{self.SERVE_VALID_FORMATS[file_fmt]}" 

1132 

1133 if size: 

1134 options = f"s{size}-" + options 

1135 

1136 url += "=" + options 

1137 

1138 response = current.request.get().response 

1139 response.headers["Content-Type"] = f"image/{file_fmt}" 

1140 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

1141 if download: 

1142 response.headers["Content-Disposition"] = f"attachment; filename={filename}" 

1143 else: 

1144 response.headers["Content-Disposition"] = f"filename={filename}" 

1145 

1146 answ = requests.get(url, timeout=20) 

1147 if not answ.ok: 

1148 logging.error(f"{answ.status_code} {answ.text}") 

1149 raise errors.BadRequest("Unable to fetch a file with these parameters") 

1150 

1151 return answ.content 

1152 

1153 @exposed 

1154 @force_ssl 

1155 @force_post 

1156 @skey(allow_empty=True) 

1157 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs): 

1158 # We can't add files directly (they need to be uploaded 

1159 if skelType == "leaf": # We need to handle leafs separately here 

1160 targetKey = kwargs.get("key") 

1161 skel = self.addSkel("leaf") 

1162 

1163 if not skel.read(targetKey): 

1164 raise errors.NotFound() 

1165 

1166 if not skel["pending"]: 

1167 raise errors.PreconditionFailed() 

1168 

1169 skel["pending"] = False 

1170 skel["parententry"] = skel["pendingparententry"] 

1171 

1172 if skel["parententry"]: 

1173 rootNode = self.getRootNode(skel["parententry"]) 

1174 else: 

1175 rootNode = None 

1176 

1177 if not self.canAdd("leaf", rootNode): 

1178 # Check for a marker in this session (created if using a signed upload URL) 

1179 session = current.session.get() 

1180 if targetKey not in (session.get("pendingFileUploadKeys") or []): 

1181 raise errors.Forbidden() 

1182 session["pendingFileUploadKeys"].remove(targetKey) 

1183 session.markChanged() 

1184 

1185 # Now read the blob from the dlkey folder 

1186 bucket = File.get_bucket(skel["dlkey"]) 

1187 

1188 blobs = list(bucket.list_blobs(prefix=f"""{skel["dlkey"]}/""")) 

1189 if len(blobs) != 1: 

1190 logging.error("Invalid number of blobs in folder") 

1191 logging.error(targetKey) 

1192 raise errors.PreconditionFailed() 

1193 

1194 # only one item is allowed here! 

1195 blob = blobs[0] 

1196 

1197 # update the corresponding file skeleton 

1198 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX) 

1199 skel["mimetype"] = utils.string.escape(blob.content_type) 

1200 skel["size"] = blob.size 

1201 skel["parentrepo"] = rootNode["key"] if rootNode else None 

1202 skel["weak"] = rootNode is None 

1203 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex() 

1204 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex() 

1205 self.onAdd("leaf", skel) 

1206 skel.write() 

1207 self.onAdded("leaf", skel) 

1208 

1209 # Add updated download-URL as the auto-generated isn't valid yet 

1210 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"]) 

1211 

1212 return self.render.addSuccess(skel) 

1213 

1214 return super().add(skelType, node, *args, **kwargs) 

1215 

1216 @exposed 

1217 def get_download_url( 

1218 self, 

1219 key: t.Optional[db.Key] = None, 

1220 dlkey: t.Optional[str] = None, 

1221 filename: t.Optional[str] = None, 

1222 derived: bool = False, 

1223 ): 

1224 """ 

1225 Request a download url for a given file 

1226 :param key: The key of the file 

1227 :param dlkey: The download key of the file 

1228 :param filename: The filename to be given. If no filename is provided 

1229 downloadUrls for all derived files are returned in case of `derived=True`. 

1230 :param derived: True, if a derived file download URL is being requested. 

1231 """ 

1232 skel = self.viewSkel("leaf") 

1233 if dlkey is not None: 

1234 skel = skel.all().filter("dlkey", dlkey).getSkel() 

1235 elif key is None and dlkey is None: 

1236 raise errors.BadRequest("No key or dlkey provided") 

1237 

1238 if not (skel and skel.read(key)): 

1239 raise errors.NotFound() 

1240 

1241 if not self.canView("leaf", skel): 

1242 raise errors.Unauthorized() 

1243 

1244 dlkey = skel["dlkey"] 

1245 

1246 if derived and filename is None: 

1247 res = {} 

1248 for filename in skel["derived"]["files"]: 

1249 res[filename] = self.create_download_url(dlkey, filename, derived) 

1250 else: 

1251 if derived: 

1252 # Check if Filename exist in the Derives. We sign nothing that not exist. 

1253 if filename not in skel["derived"]["files"]: 

1254 raise errors.NotFound("File not in derives") 

1255 else: 

1256 if filename is None: 

1257 filename = skel["name"] 

1258 elif filename != skel["name"]: 

1259 raise errors.NotFound("Filename not match") 

1260 

1261 res = self.create_download_url(dlkey, filename, derived) 

1262 

1263 return self.render.view(res) 

1264 

1265 def onEdit(self, skelType: SkelType, skel: SkeletonInstance): 

1266 super().onEdit(skelType, skel) 

1267 old_skel = self.editSkel(skelType) 

1268 old_skel.setEntity(skel.dbEntity) 

1269 

1270 if old_skel["name"] == skel["name"]: # name not changed we can return 

1271 return 

1272 

1273 # Move Blob to new name 

1274 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects 

1275 old_path = f"{skel['dlkey']}/source/{html.unescape(old_skel['name'])}" 

1276 new_path = f"{skel['dlkey']}/source/{html.unescape(skel['name'])}" 

1277 

1278 bucket = File.get_bucket(skel['dlkey']) 

1279 

1280 if not (old_blob := bucket.get_blob(old_path)): 

1281 raise errors.Gone() 

1282 

1283 bucket.copy_blob(old_blob, bucket, new_path, if_generation_match=0) 

1284 bucket.delete_blob(old_path) 

1285 

1286 def onAdded(self, skelType: SkelType, skel: SkeletonInstance) -> None: 

1287 super().onAdded(skelType, skel) 

1288 if skel["mimetype"].startswith("image/"): 

1289 if skel["size"] > self.IMAGE_META_MAX_SIZE: 

1290 logging.warning(f"File size {skel['size']} exceeds limit {self.IMAGE_META_MAX_SIZE=}") 

1291 return 

1292 self.set_image_meta(skel["key"]) 

1293 

1294 @CallDeferred 

1295 def set_image_meta(self, key: db.Key) -> None: 

1296 """Write image metadata (height and width) to FileSkel""" 

1297 skel = self.editSkel("leaf", key) 

1298 if not skel.read(key): 

1299 logging.error(f"File {key} does not exist") 

1300 return 

1301 if skel["width"] and skel["height"]: 

1302 logging.info(f'File {skel["key"]} has already {skel["width"]=} and {skel["height"]=}') 

1303 return 

1304 file_name = html.unescape(skel["name"]) 

1305 blob = self.get_bucket(skel["dlkey"]).get_blob(f"""{skel["dlkey"]}/source/{file_name}""") 

1306 if not blob: 

1307 logging.error(f'Blob {skel["dlkey"]}/source/{file_name} is missing in Cloud Storage!') 

1308 return 

1309 

1310 file_obj = io.BytesIO() 

1311 blob.download_to_file(file_obj) 

1312 file_obj.seek(0) 

1313 try: 

1314 img = Image.open(file_obj) 

1315 except Image.UnidentifiedImageError as e: # Can't load this image 

1316 logging.exception(f'Cannot open {skel["key"]} | {skel["name"]} to set image meta data: {e}') 

1317 return 

1318 

1319 skel.patch( 

1320 values={ 

1321 "width": img.width, 

1322 "height": img.height, 

1323 }, 

1324 ) 

1325 

1326 def mark_for_deletion(self, dlkey: str) -> None: 

1327 """ 

1328 Adds a marker to the datastore that the file specified as *dlkey* can be deleted. 

1329 

1330 Once the mark has been set, the data store is checked four times (default: every 4 hours) 

1331 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise 

1332 the mark and the file are removed from the datastore. These delayed checks are necessary 

1333 due to database inconsistency. 

1334 

1335 :param dlkey: Unique download-key of the file that shall be marked for deletion. 

1336 """ 

1337 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry() 

1338 

1339 if fileObj: # Its allready marked 

1340 return 

1341 

1342 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1343 fileObj["itercount"] = 0 

1344 fileObj["dlkey"] = str(dlkey) 

1345 

1346 db.Put(fileObj) 

1347 

1348 

1349@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1350def startCheckForUnreferencedBlobs(): 

1351 """ 

1352 Start searching for blob locks that have been recently freed 

1353 """ 

1354 doCheckForUnreferencedBlobs() 

1355 

1356 

1357@CallDeferred 

1358def doCheckForUnreferencedBlobs(cursor=None): 

1359 def getOldBlobKeysTxn(dbKey): 

1360 obj = db.Get(dbKey) 

1361 res = obj["old_blob_references"] or [] 

1362 if obj["is_stale"]: 

1363 db.Delete(dbKey) 

1364 else: 

1365 obj["has_old_blob_references"] = False 

1366 obj["old_blob_references"] = [] 

1367 db.Put(obj) 

1368 return res 

1369 

1370 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor) 

1371 for lockObj in query.run(100): 

1372 oldBlobKeys = db.RunInTransaction(getOldBlobKeysTxn, lockObj.key) 

1373 for blobKey in oldBlobKeys: 

1374 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry(): 

1375 # This blob is referenced elsewhere 

1376 logging.info(f"Stale blob is still referenced, {blobKey}") 

1377 continue 

1378 # Add a marker and schedule it for deletion 

1379 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry() 

1380 if fileObj: # Its already marked 

1381 logging.info(f"Stale blob already marked for deletion, {blobKey}") 

1382 return 

1383 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1384 fileObj["itercount"] = 0 

1385 fileObj["dlkey"] = str(blobKey) 

1386 logging.info(f"Stale blob marked dirty, {blobKey}") 

1387 db.Put(fileObj) 

1388 newCursor = query.getCursor() 

1389 if newCursor: 

1390 doCheckForUnreferencedBlobs(newCursor) 

1391 

1392 

1393@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1394def startCleanupDeletedFiles(): 

1395 """ 

1396 Increase deletion counter on each blob currently not referenced and delete 

1397 it if that counter reaches maxIterCount 

1398 """ 

1399 doCleanupDeletedFiles() 

1400 

1401 

1402@CallDeferred 

1403def doCleanupDeletedFiles(cursor=None): 

1404 maxIterCount = 2 # How often a file will be checked for deletion 

1405 query = db.Query("viur-deleted-files") 

1406 if cursor: 

1407 query.setCursor(cursor) 

1408 for file in query.run(100): 

1409 if "dlkey" not in file: 

1410 db.Delete(file.key) 

1411 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry(): 

1412 logging.info(f"""is referenced, {file["dlkey"]}""") 

1413 db.Delete(file.key) 

1414 else: 

1415 if file["itercount"] > maxIterCount: 

1416 logging.info(f"""Finally deleting, {file["dlkey"]}""") 

1417 bucket = File.get_bucket(file["dlkey"]) 

1418 blobs = bucket.list_blobs(prefix=f"""{file["dlkey"]}/""") 

1419 for blob in blobs: 

1420 blob.delete() 

1421 db.Delete(file.key) 

1422 # There should be exactly 1 or 0 of these 

1423 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99): 

1424 f.delete() 

1425 

1426 if f["serving_url"]: 

1427 bucket = File.get_bucket(f["dlkey"]) 

1428 blob_key = blobstore.create_gs_key( 

1429 f"/gs/{bucket.name}/{f['dlkey']}/source/{f['name']}" 

1430 ) 

1431 images.delete_serving_url(blob_key) # delete serving url 

1432 else: 

1433 logging.debug(f"""Increasing count, {file["dlkey"]}""") 

1434 file["itercount"] += 1 

1435 db.Put(file) 

1436 newCursor = query.getCursor() 

1437 if newCursor: 

1438 doCleanupDeletedFiles(newCursor) 

1439 

1440 

1441@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1442def start_delete_pending_files(): 

1443 """ 

1444 Start deletion of pending FileSkels that are older than 7 days. 

1445 """ 

1446 DeleteEntitiesIter.startIterOnQuery( 

1447 FileLeafSkel().all() 

1448 .filter("pending =", True) 

1449 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7)) 

1450 ) 

1451 

1452 

1453# DEPRECATED ATTRIBUTES HANDLING 

1454 

1455def __getattr__(attr: str) -> object: 

1456 if entry := { 

1457 # stuff prior viur-core < 3.7 

1458 "GOOGLE_STORAGE_BUCKET": ("File.get_bucket()", _private_bucket), 

1459 }.get(attr): 

1460 msg = f"{attr} was replaced by {entry[0]}" 

1461 warnings.warn(msg, DeprecationWarning, stacklevel=2) 

1462 logging.warning(msg, stacklevel=2) 

1463 return entry[1] 

1464 

1465 return super(__import__(__name__).__class__).__getattribute__(attr)