Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/modules/file.py: 0%
757 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-02-07 19:28 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-02-07 19:28 +0000
1import base64
2import datetime
3import hashlib
4import hmac
5import html
6import io
7import json
8import logging
9import re
10import string
11import typing as t
12import warnings
13from collections import namedtuple
14from urllib.parse import quote as urlquote, urlencode
15from urllib.request import urlopen
17import PIL
18import PIL.ImageCms
19import google.auth
20import requests
21from PIL import Image
22from google.appengine.api import blobstore, images
23from google.cloud import storage
24from google.oauth2.service_account import Credentials as ServiceAccountCredentials
26from viur.core import conf, current, db, errors, utils
27from viur.core.bones import BaseBone, BooleanBone, KeyBone, NumericBone, StringBone
28from viur.core.decorators import *
29from viur.core.i18n import LanguageWrapper
30from viur.core.prototypes.tree import SkelType, Tree, TreeSkel
31from viur.core.skeleton import SkeletonInstance, skeletonByKind
32from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask
34# Globals for connectivity
36VALID_FILENAME_REGEX = re.compile(
37 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|`
38 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$",
39 re.IGNORECASE
40)
42_CREDENTIALS, _PROJECT_ID = google.auth.default()
43GOOGLE_STORAGE_CLIENT = storage.Client(_PROJECT_ID, _CREDENTIALS)
45PRIVATE_BUCKET_NAME = f"""{_PROJECT_ID}.appspot.com"""
46PUBLIC_BUCKET_NAME = f"""public-dot-{_PROJECT_ID}"""
47PUBLIC_DLKEY_SUFFIX = "_pub"
49_private_bucket = GOOGLE_STORAGE_CLIENT.lookup_bucket(PRIVATE_BUCKET_NAME)
50_public_bucket = None
52# FilePath is a descriptor for ViUR file components
53FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename"))
56def importBlobFromViur2(dlKey, fileName):
57 bucket = File.get_bucket(dlKey)
59 if not conf.viur2import_blobsource:
60 return False
61 existingImport = db.Get(db.Key("viur-viur2-blobimport", dlKey))
62 if existingImport:
63 if existingImport["success"]:
64 return existingImport["dlurl"]
65 return False
66 if conf.viur2import_blobsource["infoURL"]:
67 try:
68 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey)
69 except Exception as e:
70 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
71 marker["success"] = False
72 marker["error"] = "Failed URL-FETCH 1"
73 db.Put(marker)
74 return False
75 if importDataReq.status != 200:
76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
77 marker["success"] = False
78 marker["error"] = "Failed URL-FETCH 2"
79 db.Put(marker)
80 return False
81 importData = json.loads(importDataReq.read())
82 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]
83 srcBlob = storage.Blob(bucket=bucket,
84 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"])
85 else:
86 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey
87 srcBlob = storage.Blob(bucket=bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey)
88 if not srcBlob.exists():
89 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
90 marker["success"] = False
91 marker["error"] = "Local SRC-Blob missing"
92 marker["oldBlobName"] = oldBlobName
93 db.Put(marker)
94 return False
95 bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}")
96 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
97 marker["success"] = True
98 marker["old_src_key"] = dlKey
99 marker["old_src_name"] = fileName
100 marker["dlurl"] = File.create_download_url(dlKey, fileName, False, None)
101 db.Put(marker)
102 return marker["dlurl"]
105def thumbnailer(fileSkel, existingFiles, params):
106 file_name = html.unescape(fileSkel["name"])
107 bucket = File.get_bucket(fileSkel["dlkey"])
108 blob = bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""")
109 if not blob:
110 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""")
111 return
112 fileData = io.BytesIO()
113 blob.download_to_file(fileData)
114 resList = []
115 for sizeDict in params:
116 fileData.seek(0)
117 outData = io.BytesIO()
118 try:
119 img = PIL.Image.open(fileData)
120 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions
121 return []
122 iccProfile = img.info.get('icc_profile')
123 if iccProfile:
124 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert
125 # to WEBp as we'll loose this color-profile information
126 f = io.BytesIO(iccProfile)
127 src_profile = PIL.ImageCms.ImageCmsProfile(f)
128 dst_profile = PIL.ImageCms.createProfile('sRGB')
129 try:
130 img = PIL.ImageCms.profileToProfile(
131 img,
132 inputProfile=src_profile,
133 outputProfile=dst_profile,
134 outputMode="RGBA" if img.has_transparency_data else "RGB")
135 except Exception as e:
136 logging.exception(e)
137 continue
138 fileExtension = sizeDict.get("fileExtension", "webp")
139 if "width" in sizeDict and "height" in sizeDict:
140 width = sizeDict["width"]
141 height = sizeDict["height"]
142 targetName = f"thumbnail-{width}-{height}.{fileExtension}"
143 elif "width" in sizeDict:
144 width = sizeDict["width"]
145 height = int((float(img.size[1]) * float(width / float(img.size[0]))))
146 targetName = f"thumbnail-w{width}.{fileExtension}"
147 else: # No default fallback - ignore
148 continue
149 mimeType = sizeDict.get("mimeType", "image/webp")
150 img = img.resize((width, height), PIL.Image.LANCZOS)
151 img.save(outData, fileExtension)
152 outSize = outData.tell()
153 outData.seek(0)
154 targetBlob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{targetName}""")
155 targetBlob.upload_from_file(outData, content_type=mimeType)
156 resList.append((targetName, outSize, mimeType, {"mimetype": mimeType, "width": width, "height": height}))
157 return resList
160def cloudfunction_thumbnailer(fileSkel, existingFiles, params):
161 """External Thumbnailer for images.
163 The corresponding cloudfunction can be found here .
164 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer
166 You can use it like so:
167 main.py:
169 .. code-block:: python
171 from viur.core.modules.file import cloudfunction_thumbnailer
173 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer"
174 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer}
176 conf.derives_pdf = {
177 "thumbnail": [{"width": 1920,"sites":"1,2"}]
178 }
180 skeletons/xxx.py:
181 .. code-block:: python
183 test = FileBone(derive=conf.derives_pdf)
184 """
186 if not conf.file_thumbnailer_url:
187 raise ValueError("conf.file_thumbnailer_url is not set")
189 bucket = File.get_bucket(fileSkel["dlkey"])
191 def getsignedurl():
192 if conf.instance.is_dev_server:
193 signedUrl = File.create_download_url(fileSkel["dlkey"], fileSkel["name"])
194 else:
195 path = f"""{fileSkel["dlkey"]}/source/{file_name}"""
196 if not (blob := bucket.get_blob(path)):
197 logging.warning(f"Blob {path} is missing from cloud storage!")
198 return None
199 authRequest = google.auth.transport.requests.Request()
200 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
201 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "")
202 content_disposition = f"""filename={fileSkel["name"]}"""
203 signedUrl = blob.generate_signed_url(
204 expiresAt,
205 credentials=signing_credentials,
206 response_disposition=content_disposition,
207 version="v4")
208 return signedUrl
210 def make_request():
211 headers = {"Content-Type": "application/json"}
212 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8"))
213 sig = File.hmac_sign(data_str)
214 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig})
215 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False)
216 if resp.status_code != 200: # Error Handling
217 match resp.status_code:
218 case 302:
219 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found
220 # https://cloud.google.com/functions/docs/troubleshooting#login
221 logging.error("Cloudfunction not found")
222 case 404:
223 logging.error("Cloudfunction not found")
224 case 403:
225 logging.error("No permission for the Cloudfunction")
226 case _:
227 logging.error(
228 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}")
229 return
231 try:
232 response_data = resp.json()
233 except Exception as e:
234 logging.error(f"response could not be converted in json failed with: {e=}")
235 return
236 if "error" in response_data:
237 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}")
238 return
240 return response_data
242 file_name = html.unescape(fileSkel["name"])
244 if not (url := getsignedurl()):
245 return
246 dataDict = {
247 "url": url,
248 "name": fileSkel["name"],
249 "params": params,
250 "minetype": fileSkel["mimetype"],
251 "baseUrl": current.request.get().request.host_url.lower(),
252 "targetKey": fileSkel["dlkey"],
253 "nameOnly": True
254 }
255 if not (derivedData := make_request()):
256 return
258 uploadUrls = {}
259 for data in derivedData["values"]:
260 fileName = File.sanitize_filename(data["name"])
261 blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""")
262 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60,
263 content_type=data["mimeType"])
265 if not (url := getsignedurl()):
266 return
268 dataDict["url"] = url
269 dataDict["nameOnly"] = False
270 dataDict["uploadUrls"] = uploadUrls
272 if not (derivedData := make_request()):
273 return
274 reslist = []
275 try:
276 for derived in derivedData["values"]:
277 for key, value in derived.items():
278 reslist.append((key, value["size"], value["mimetype"], value["customData"]))
280 except Exception as e:
281 logging.error(f"cloudfunction_thumbnailer failed with: {e=}")
282 return reslist
285class DownloadUrlBone(BaseBone):
286 """
287 This bone is used to inject a freshly signed download url into a FileSkel.
288 """
290 def unserialize(self, skel, name):
291 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity:
292 skel.accessedValues[name] = File.create_download_url(
293 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration
294 )
295 return True
297 return False
300class FileLeafSkel(TreeSkel):
301 """
302 Default file leaf skeleton.
303 """
304 kindName = "file"
306 size = StringBone(
307 descr="Size",
308 readOnly=True,
309 searchable=True,
310 )
312 dlkey = StringBone(
313 descr="Download-Key",
314 readOnly=True,
315 )
317 name = StringBone(
318 descr="Filename",
319 caseSensitive=False,
320 searchable=True,
321 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided",
322 )
324 mimetype = StringBone(
325 descr="MIME-Type",
326 readOnly=True,
327 )
329 weak = BooleanBone(
330 descr="Weak reference",
331 readOnly=True,
332 visible=False,
333 )
335 pending = BooleanBone(
336 descr="Pending upload",
337 readOnly=True,
338 visible=False,
339 defaultValue=False,
340 )
342 width = NumericBone(
343 descr="Width",
344 readOnly=True,
345 searchable=True,
346 )
348 height = NumericBone(
349 descr="Height",
350 readOnly=True,
351 searchable=True,
352 )
354 downloadUrl = DownloadUrlBone(
355 descr="Download-URL",
356 readOnly=True,
357 visible=False,
358 )
360 derived = BaseBone(
361 descr="Derived Files",
362 readOnly=True,
363 visible=False,
364 )
366 pendingparententry = KeyBone(
367 descr="Pending key Reference",
368 readOnly=True,
369 visible=False,
370 )
372 crc32c_checksum = StringBone(
373 descr="CRC32C checksum",
374 readOnly=True,
375 )
377 md5_checksum = StringBone(
378 descr="MD5 checksum",
379 readOnly=True,
380 )
382 public = BooleanBone(
383 descr="Public File",
384 readOnly=True,
385 defaultValue=False,
386 )
388 serving_url = StringBone(
389 descr="Serving-URL",
390 readOnly=True,
391 params={
392 "tooltip": "The 'serving_url' is only available in public file repositories.",
393 }
394 )
396 @classmethod
397 def _inject_serving_url(cls, skel: SkeletonInstance) -> None:
398 """Inject the serving url for public image files into a FileSkel"""
399 if (
400 skel["public"]
401 and skel["mimetype"]
402 and skel["mimetype"].startswith("image/")
403 and not skel["serving_url"]
404 ):
405 bucket = File.get_bucket(skel["dlkey"])
406 filename = f"/gs/{bucket.name}/{skel['dlkey']}/source/{skel['name']}"
408 # Trying this on local development server will raise a
409 # `google.appengine.runtime.apiproxy_errors.RPCFailedError`
410 if conf.instance.is_dev_server:
411 logging.warning(f"Can't inject serving_url for {filename!r} on local development server")
412 return
414 try:
415 skel["serving_url"] = images.get_serving_url(None, secure_url=True, filename=filename)
417 except Exception as e:
418 logging.warning(f"Failed to create serving_url for {filename!r} with exception {e!r}")
419 logging.exception(e)
421 def preProcessBlobLocks(self, locks):
422 """
423 Ensure that our dlkey is locked even if we don't have a filebone here
424 """
425 if not self["weak"] and self["dlkey"]:
426 locks.add(self["dlkey"])
427 return locks
429 @classmethod
430 def refresh(cls, skel):
431 super().refresh(skel)
432 if conf.viur2import_blobsource:
433 importData = importBlobFromViur2(skel["dlkey"], skel["name"])
434 if importData:
435 if not skel["downloadUrl"]:
436 skel["downloadUrl"] = importData
437 skel["pendingparententry"] = None
439 cls._inject_serving_url(skel)
441 @classmethod
442 def write(cls, skel, **kwargs):
443 cls._inject_serving_url(skel)
444 return super().write(skel, **kwargs)
447class FileNodeSkel(TreeSkel):
448 """
449 Default file node skeleton.
450 """
451 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname
453 name = StringBone(
454 descr="Name",
455 required=True,
456 searchable=True
457 )
459 rootNode = BooleanBone(
460 descr="Is RootNode",
461 defaultValue=False,
462 readOnly=True,
463 visible=False,
464 )
466 public = BooleanBone(
467 descr="Is public?",
468 defaultValue=False,
469 readOnly=True,
470 visible=False,
471 )
473 viurCurrentSeoKeys = None
476class File(Tree):
477 PENDING_POSTFIX = " (pending)"
478 DOWNLOAD_URL_PREFIX = "/file/download/"
479 INTERNAL_SERVING_URL_PREFIX = "/file/serve/"
480 MAX_FILENAME_LEN = 256
481 IMAGE_META_MAX_SIZE: t.Final[int] = 10 * 1024 ** 2
482 """Maximum size of image files that should be analysed in :meth:`set_image_meta`.
483 Default: 10 MiB"""
485 leafSkelCls = FileLeafSkel
486 nodeSkelCls = FileNodeSkel
488 handler = "tree.simple.file"
489 adminInfo = {
490 "icon": "folder-fill",
491 "handler": handler, # fixme: Use static handler; Remove with VIUR4!
492 }
494 roles = {
495 "*": "view",
496 "editor": ("add", "edit"),
497 "admin": "*",
498 }
500 default_order = "name"
502 # Helper functions currently resist here
504 @staticmethod
505 def get_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket:
506 """
507 Retrieves a Google Cloud Storage bucket for the given dlkey.
508 """
509 global _public_bucket
510 if dlkey and dlkey.endswith(PUBLIC_DLKEY_SUFFIX):
511 if _public_bucket or (_public_bucket := GOOGLE_STORAGE_CLIENT.lookup_bucket(PUBLIC_BUCKET_NAME)):
512 return _public_bucket
514 raise ValueError(
515 f"""The bucket '{PUBLIC_BUCKET_NAME}' does not exist! Please create it with ACL access."""
516 )
518 return _private_bucket
520 @staticmethod
521 def is_valid_filename(filename: str) -> bool:
522 """
523 Verifies a valid filename.
525 The filename should be valid on Linux, Mac OS and Windows.
526 It should not be longer than MAX_FILENAME_LEN chars.
528 Rule set: https://stackoverflow.com/a/31976060/3749896
529 Regex test: https://regex101.com/r/iBYpoC/1
530 """
531 if len(filename) > File.MAX_FILENAME_LEN:
532 return False
534 return bool(re.match(VALID_FILENAME_REGEX, filename))
536 @staticmethod
537 def hmac_sign(data: t.Any) -> str:
538 assert conf.file_hmac_key is not None, "No hmac-key set!"
539 if not isinstance(data, bytes):
540 data = str(data).encode("UTF-8")
541 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest()
543 @staticmethod
544 def hmac_verify(data: t.Any, signature: str) -> bool:
545 return hmac.compare_digest(File.hmac_sign(data.encode("ASCII")), signature)
547 @staticmethod
548 def create_internal_serving_url(
549 serving_url: str,
550 size: int = 0,
551 filename: str = "",
552 options: str = "",
553 download: bool = False
554 ) -> str:
555 """
556 Helper function to generate an internal serving url (endpoint: /file/serve) from a Google serving url.
558 This is needed to hide requests to Google as they are internally be routed, and can be the result of a
559 legal requirement like GDPR.
561 :param serving_url: Is the original serving URL as generated from FileLeafSkel._inject_serving_url()
562 :param size: Optional size setting
563 :param filename: Optonal filename setting
564 :param options: Additional options parameter-pass through to /file/serve
565 :param download: Download parameter-pass through to /file/serve
566 """
568 # Split a serving URL into its components, used by serve function.
569 res = re.match(
570 r"^https:\/\/(.*?)\.googleusercontent\.com\/(.*?)$",
571 serving_url
572 )
574 if not res:
575 raise ValueError(f"Invalid {serving_url=!r} provided")
577 # Create internal serving URL
578 serving_url = File.INTERNAL_SERVING_URL_PREFIX + "/".join(res.groups())
580 # Append additional parameters
581 if params := {
582 k: v for k, v in {
583 "download": download,
584 "filename": filename,
585 "options": options,
586 "size": size,
587 }.items() if v
588 }:
589 serving_url += f"?{urlencode(params)}"
591 return serving_url
593 @staticmethod
594 def create_download_url(
595 dlkey: str,
596 filename: str,
597 derived: bool = False,
598 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1),
599 download_filename: t.Optional[str] = None
600 ) -> str:
601 """
602 Utility function that creates a signed download-url for the given folder/filename combination
604 :param folder: The GCS-Folder (= the download-key) for that file
605 :param filename: The name of the file. Either the original filename or the name of a derived file.
606 :param derived: True, if it points to a derived file, False if it points to the original uploaded file
607 :param expires:
608 None if the file is supposed to be public (which causes it to be cached on the google ede caches),
609 otherwise a datetime.timedelta of how long that link should be valid
610 :param download_filename: If set, browser is enforced to download this blob with the given alternate
611 filename
612 :return: The signed download-url relative to the current domain (eg /download/...)
613 """
614 if isinstance(expires, int):
615 expires = datetime.timedelta(minutes=expires)
617 # Undo escaping on ()= performed on fileNames
618 filename = filename.replace("(", "(").replace(")", ")").replace("=", "=")
619 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}"""
621 if download_filename:
622 if not File.is_valid_filename(download_filename):
623 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided")
625 download_filename = urlquote(download_filename)
627 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0
629 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8"))
630 sig = File.hmac_sign(data)
632 return f"""{File.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}"""
634 @staticmethod
635 def parse_download_url(url) -> t.Optional[FilePath]:
636 """
637 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath.
639 If the URL cannot be parsed, the function returns None.
641 :param url: The file download URL to be parsed.
642 :return: A FilePath on success, None otherwise.
643 """
644 if not url.startswith(File.DOWNLOAD_URL_PREFIX) or "?" not in url:
645 return None
647 data, sig = url.removeprefix(File.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?"
648 sig = sig.removeprefix("sig=")
650 if not File.hmac_verify(data, sig):
651 # Invalid signature
652 return None
654 # Split the blobKey into the individual fields it should contain
655 data = base64.urlsafe_b64decode(data).decode("UTF-8")
657 match data.count("\0"):
658 case 2:
659 dlpath, valid_until, _ = data.split("\0")
660 case 1:
661 # It's the old format, without an downloadFileName
662 dlpath, valid_until = data.split("\0")
663 case _:
664 # Invalid path
665 return None
667 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now():
668 # Signature expired
669 return None
671 if dlpath.count("/") != 2:
672 # Invalid path
673 return None
675 dlkey, derived, filename = dlpath.split("/")
676 return FilePath(dlkey, derived != "source", filename)
678 @staticmethod
679 def create_src_set(
680 file: t.Union["SkeletonInstance", dict, str],
681 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1),
682 width: t.Optional[int] = None,
683 height: t.Optional[int] = None,
684 language: t.Optional[str] = None,
685 ) -> str:
686 """
687 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser
688 with a list of images in different sizes and allows it to choose the smallest file that will fill it's
689 viewport without upscaling.
691 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset.
692 :param expires:
693 None if the file is supposed to be public (which causes it to be cached on the google edecaches),
694 otherwise it's lifetime in seconds
695 :param width:
696 A list of widths that should be included in the srcset.
697 If a given width is not available, it will be skipped.
698 :param height: A list of heights that should be included in the srcset. If a given height is not available,
699 it will be skipped.
700 :param language: Language overwrite if file has multiple languages, and we want to explicitly specify one
701 :return: The srctag generated or an empty string if a invalid file object was supplied
702 """
703 if not width and not height:
704 logging.error("Neither width or height supplied")
705 return ""
707 if isinstance(file, str):
708 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry()
710 if not file:
711 return ""
713 if isinstance(file, LanguageWrapper):
714 language = language or current.language.get()
715 if not language or not (file := file.get(language)):
716 return ""
718 if "dlkey" not in file and "dest" in file:
719 file = file["dest"]
721 from viur.core.skeleton import SkeletonInstance # avoid circular imports
723 if not (
724 isinstance(file, (SkeletonInstance, dict))
725 and "dlkey" in file
726 and "derived" in file
727 ):
728 logging.error("Invalid file supplied")
729 return ""
731 if not isinstance(file["derived"], dict):
732 logging.error("No derives available")
733 return ""
735 src_set = []
736 for filename, derivate in file["derived"]["files"].items():
737 customData = derivate.get("customData", {})
739 if width and customData.get("width") in width:
740 src_set.append(
741 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w"""
742 )
744 if height and customData.get("height") in height:
745 src_set.append(
746 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h"""
747 )
749 return ", ".join(src_set)
751 def write(
752 self,
753 filename: str,
754 content: t.Any,
755 mimetype: str = "text/plain",
756 width: int = None,
757 height: int = None,
758 public: bool = False,
759 ) -> db.Key:
760 """
761 Write a file from any buffer into the file module.
763 :param filename: Filename to be written.
764 :param content: The file content to be written, as bytes-like object.
765 :param mimetype: The file's mimetype.
766 :param width: Optional width information for the file.
767 :param height: Optional height information for the file.
768 :param public: True if the file should be publicly accessible.
769 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone.
770 """
771 # logging.info(f"{filename=} {mimetype=} {width=} {height=} {public=}")
772 if not File.is_valid_filename(filename):
773 raise ValueError(f"{filename=} is invalid")
775 dl_key = utils.string.random()
777 if public:
778 dl_key += PUBLIC_DLKEY_SUFFIX # mark file as public
780 bucket = File.get_bucket(dl_key)
782 blob = bucket.blob(f"{dl_key}/source/{filename}")
783 blob.upload_from_file(io.BytesIO(content), content_type=mimetype)
785 skel = self.addSkel("leaf")
786 skel["name"] = filename
787 skel["size"] = blob.size
788 skel["mimetype"] = mimetype
789 skel["dlkey"] = dl_key
790 skel["weak"] = True
791 skel["public"] = public
792 skel["width"] = width
793 skel["height"] = height
794 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex()
795 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex()
797 skel.write()
798 return skel["key"]
800 def read(
801 self,
802 key: db.Key | int | str | None = None,
803 path: str | None = None,
804 ) -> tuple[io.BytesIO, str]:
805 """
806 Read a file from the Cloud Storage.
808 If a key and a path are provided, the key is preferred.
809 This means that the entry in the db is searched first and if this is not found, the path is used.
811 :param key: Key of the LeafSkel that contains the "dlkey" and the "name".
812 :param path: The path of the file in the Cloud Storage Bucket.
814 :return: Returns the file as a io.BytesIO buffer and the content-type
815 """
816 if not key and not path:
817 raise ValueError("Please provide a key or a path")
819 if key:
820 skel = self.viewSkel("leaf")
821 if not skel.read(db.keyHelper(key, skel.kindName)):
822 if not path:
823 raise ValueError("This skeleton is not in the database!")
824 else:
825 path = f"""{skel["dlkey"]}/source/{skel["name"]}"""
827 bucket = File.get_bucket(skel["dlkey"])
828 else:
829 bucket = File.get_bucket(path.split("/", 1)[0]) # path's first part is dlkey plus eventual postfix
831 blob = bucket.blob(path)
832 return io.BytesIO(blob.download_as_bytes()), blob.content_type
834 @CallDeferred
835 def deleteRecursive(self, parentKey):
836 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter()
837 for fileEntry in files:
838 self.mark_for_deletion(fileEntry["dlkey"])
839 skel = self.leafSkelCls()
841 if skel.read(str(fileEntry.key())):
842 skel.delete()
843 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter()
844 for d in dirs:
845 self.deleteRecursive(d.key)
846 skel = self.nodeSkelCls()
847 if skel.read(d.key):
848 skel.delete()
850 @exposed
851 @skey
852 def getUploadURL(
853 self,
854 fileName: str,
855 mimeType: str,
856 size: t.Optional[int] = None,
857 node: t.Optional[str | db.Key] = None,
858 authData: t.Optional[str] = None,
859 authSig: t.Optional[str] = None,
860 public: bool = False,
861 ):
862 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names
864 if not File.is_valid_filename(filename):
865 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided")
867 # Validate the mimetype from the client seems legit
868 mimetype = mimeType.strip().lower()
869 if not (
870 mimetype
871 and mimetype.count("/") == 1
872 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype)
873 ):
874 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided")
876 # Validate authentication data
877 if authData and authSig:
878 # First, validate the signature, otherwise we don't need to proceed further
879 if not self.hmac_verify(authData, authSig):
880 raise errors.Unauthorized()
882 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8"))
884 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now():
885 raise errors.Gone("The upload URL has expired")
887 if authData["validMimeTypes"]:
888 for validMimeType in authData["validMimeTypes"]:
889 if (
890 validMimeType == mimetype
891 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1]))
892 ):
893 break
894 else:
895 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided")
897 node = authData["node"]
898 maxSize = authData["maxSize"]
900 else:
901 rootNode = None
902 if node and not (rootNode := self.getRootNode(node)):
903 raise errors.NotFound(f"No valid root node found for {node=}")
905 if not self.canAdd("leaf", rootNode):
906 raise errors.Forbidden()
908 if rootNode and public != bool(rootNode.get("public")):
909 raise errors.Forbidden("Cannot upload a public file into private repository or vice versa")
911 maxSize = None # The user has some file/add permissions, don't restrict fileSize
913 if maxSize:
914 if size > maxSize:
915 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}")
916 else:
917 size = None
919 # Create upload-URL and download key
920 dlkey = utils.string.random() # let's roll a random key
922 if public:
923 dlkey += PUBLIC_DLKEY_SUFFIX # mark file as public
925 blob = File.get_bucket(dlkey).blob(f"{dlkey}/source/{filename}")
926 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60)
928 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object
929 # the user creates matches the file he had uploaded
930 file_skel = self.addSkel("leaf")
932 file_skel["name"] = filename + self.PENDING_POSTFIX
933 file_skel["size"] = 0
934 file_skel["mimetype"] = "application/octetstream"
935 file_skel["dlkey"] = dlkey
936 file_skel["parentdir"] = None
937 file_skel["pendingparententry"] = db.keyHelper(node, self.addSkel("node").kindName) if node else None
938 file_skel["pending"] = True
939 file_skel["weak"] = True
940 file_skel["public"] = public
941 file_skel["width"] = 0
942 file_skel["height"] = 0
944 file_skel.write()
945 key = str(file_skel["key"])
947 # Mark that entry dirty as we might never receive an add
948 self.mark_for_deletion(dlkey)
950 # In this case, we'd have to store the key in the users session so he can call add() later on
951 if authData and authSig:
952 session = current.session.get()
954 if "pendingFileUploadKeys" not in session:
955 session["pendingFileUploadKeys"] = []
957 session["pendingFileUploadKeys"].append(key)
959 # Clamp to the latest 50 pending uploads
960 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:]
961 session.markChanged()
963 return self.render.view({
964 "uploadKey": key,
965 "uploadUrl": upload_url,
966 })
968 @exposed
969 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs):
970 """
971 Download a file.
972 :param blobKey: The unique blob key of the file.
973 :param fileName: Optional filename to provide in the header.
974 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted.
975 """
976 if filename := fileName.strip():
977 if not File.is_valid_filename(filename):
978 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!")
980 download_filename = ""
982 try:
983 dlPath, validUntil, download_filename = base64.urlsafe_b64decode(
984 blobKey).decode("UTF-8").split("\0")
985 except Exception as e: # It's the old format, without an downloadFileName
986 dlPath, validUntil = base64.urlsafe_b64decode(blobKey).decode(
987 "UTF-8").split("\0")
989 bucket = File.get_bucket(dlPath.split("/", 1)[0])
991 if not sig:
992 # Check if the current user has the right to download *any* blob present in this application.
993 # blobKey is then the path inside cloudstore - not a base64 encoded tuple
994 if not (usr := current.user.get()):
995 raise errors.Unauthorized()
996 if "root" not in usr["access"] and "file-view" not in usr["access"]:
997 raise errors.Forbidden()
998 validUntil = "-1" # Prevent this from being cached down below
999 blob = bucket.get_blob(blobKey)
1001 else:
1002 # We got an request including a signature (probably a guest or a user without file-view access)
1003 # First, validate the signature, otherwise we don't need to proceed any further
1004 if not self.hmac_verify(blobKey, sig):
1005 raise errors.Forbidden()
1007 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now():
1008 blob = None
1009 else:
1010 blob = bucket.get_blob(dlPath)
1012 if not blob:
1013 raise errors.Gone("The requested blob has expired.")
1015 if not filename:
1016 filename = download_filename or urlquote(blob.name.rsplit("/", 1)[-1])
1018 content_disposition = "; ".join(
1019 item for item in (
1020 "attachment" if download else None,
1021 f"filename={filename}" if filename else None,
1022 ) if item
1023 )
1025 if isinstance(_CREDENTIALS, ServiceAccountCredentials):
1026 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
1027 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4")
1028 raise errors.Redirect(signedUrl)
1030 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly
1031 response = current.request.get().response
1032 response.headers["Content-Type"] = blob.content_type
1033 if content_disposition:
1034 response.headers["Content-Disposition"] = content_disposition
1035 return blob.download_as_bytes()
1037 if validUntil == "0" or blobKey.endswith(PUBLIC_DLKEY_SUFFIX): # Its an indefinitely valid URL
1038 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches
1039 response = current.request.get().response
1040 response.headers["Content-Type"] = blob.content_type
1041 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days
1042 if content_disposition:
1043 response.headers["Content-Disposition"] = content_disposition
1044 return blob.download_as_bytes()
1046 # Default fallback - create a signed URL and redirect
1047 authRequest = google.auth.transport.requests.Request()
1048 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
1049 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "")
1050 signedUrl = blob.generate_signed_url(
1051 expiresAt,
1052 credentials=signing_credentials,
1053 response_disposition=content_disposition,
1054 version="v4")
1056 raise errors.Redirect(signedUrl)
1058 SERVE_VALID_OPTIONS = {
1059 "c",
1060 "p",
1061 "fv",
1062 "fh",
1063 "r90",
1064 "r180",
1065 "r270",
1066 "nu",
1067 }
1068 """
1069 Valid modification option shorts for the serve-function.
1070 This is passed-through to the Google UserContent API, and hast to be supported there.
1071 """
1073 SERVE_VALID_FORMATS = {
1074 "jpg": "rj",
1075 "jpeg": "rj",
1076 "png": "rp",
1077 "webp": "rw",
1078 }
1079 """
1080 Valid file-formats to the serve-function.
1081 This is passed-through to the Google UserContent API, and hast to be supported there.
1082 """
1084 @exposed
1085 def serve(
1086 self,
1087 host: str,
1088 key: str,
1089 size: t.Optional[int] = None,
1090 filename: t.Optional[str] = None,
1091 options: str = "",
1092 download: bool = False,
1093 ):
1094 """
1095 Requests an image using the serving url to bypass direct Google requests.
1097 :param host: the google host prefix i.e. lh3
1098 :param key: the serving url key
1099 :param size: the target image size
1100 :param filename: a random string with an extention, valid extentions are (defined in File.SERVE_VALID_FORMATS).
1101 :param options: - seperated options (defined in File.SERVE_VALID_OPTIONS).
1102 c - crop
1103 p - face crop
1104 fv - vertrical flip
1105 fh - horizontal flip
1106 rXXX - rotate 90, 180, 270
1107 nu - no upscale
1108 :param download: Serves the content as download (Content-Disposition) or not.
1110 :return: Returns the requested content on success, raises a proper HTTP exception otherwise.
1111 """
1113 if any(c not in conf.search_valid_chars for c in host):
1114 raise errors.BadRequest("key contains invalid characters")
1116 # extract format from filename
1117 file_fmt = "webp"
1119 if filename:
1120 fmt = filename.rsplit(".", 1)[-1].lower()
1121 if fmt in self.SERVE_VALID_FORMATS:
1122 file_fmt = fmt
1123 else:
1124 raise errors.UnprocessableEntity(f"Unsupported filetype {fmt}")
1126 url = f"https://{host}.googleusercontent.com/{key}"
1128 if options and not all(param in self.SERVE_VALID_OPTIONS for param in options.split("-")):
1129 raise errors.BadRequest("Invalid options provided")
1131 options += f"-{self.SERVE_VALID_FORMATS[file_fmt]}"
1133 if size:
1134 options = f"s{size}-" + options
1136 url += "=" + options
1138 response = current.request.get().response
1139 response.headers["Content-Type"] = f"image/{file_fmt}"
1140 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days
1141 if download:
1142 response.headers["Content-Disposition"] = f"attachment; filename={filename}"
1143 else:
1144 response.headers["Content-Disposition"] = f"filename={filename}"
1146 answ = requests.get(url, timeout=20)
1147 if not answ.ok:
1148 logging.error(f"{answ.status_code} {answ.text}")
1149 raise errors.BadRequest("Unable to fetch a file with these parameters")
1151 return answ.content
1153 @exposed
1154 @force_ssl
1155 @force_post
1156 @skey(allow_empty=True)
1157 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs):
1158 # We can't add files directly (they need to be uploaded
1159 if skelType == "leaf": # We need to handle leafs separately here
1160 targetKey = kwargs.get("key")
1161 skel = self.addSkel("leaf")
1163 if not skel.read(targetKey):
1164 raise errors.NotFound()
1166 if not skel["pending"]:
1167 raise errors.PreconditionFailed()
1169 skel["pending"] = False
1170 skel["parententry"] = skel["pendingparententry"]
1172 if skel["parententry"]:
1173 rootNode = self.getRootNode(skel["parententry"])
1174 else:
1175 rootNode = None
1177 if not self.canAdd("leaf", rootNode):
1178 # Check for a marker in this session (created if using a signed upload URL)
1179 session = current.session.get()
1180 if targetKey not in (session.get("pendingFileUploadKeys") or []):
1181 raise errors.Forbidden()
1182 session["pendingFileUploadKeys"].remove(targetKey)
1183 session.markChanged()
1185 # Now read the blob from the dlkey folder
1186 bucket = File.get_bucket(skel["dlkey"])
1188 blobs = list(bucket.list_blobs(prefix=f"""{skel["dlkey"]}/"""))
1189 if len(blobs) != 1:
1190 logging.error("Invalid number of blobs in folder")
1191 logging.error(targetKey)
1192 raise errors.PreconditionFailed()
1194 # only one item is allowed here!
1195 blob = blobs[0]
1197 # update the corresponding file skeleton
1198 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX)
1199 skel["mimetype"] = utils.string.escape(blob.content_type)
1200 skel["size"] = blob.size
1201 skel["parentrepo"] = rootNode["key"] if rootNode else None
1202 skel["weak"] = rootNode is None
1203 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex()
1204 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex()
1205 self.onAdd("leaf", skel)
1206 skel.write()
1207 self.onAdded("leaf", skel)
1209 # Add updated download-URL as the auto-generated isn't valid yet
1210 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"])
1212 return self.render.addSuccess(skel)
1214 return super().add(skelType, node, *args, **kwargs)
1216 @exposed
1217 def get_download_url(
1218 self,
1219 key: t.Optional[db.Key] = None,
1220 dlkey: t.Optional[str] = None,
1221 filename: t.Optional[str] = None,
1222 derived: bool = False,
1223 ):
1224 """
1225 Request a download url for a given file
1226 :param key: The key of the file
1227 :param dlkey: The download key of the file
1228 :param filename: The filename to be given. If no filename is provided
1229 downloadUrls for all derived files are returned in case of `derived=True`.
1230 :param derived: True, if a derived file download URL is being requested.
1231 """
1232 skel = self.viewSkel("leaf")
1233 if dlkey is not None:
1234 skel = skel.all().filter("dlkey", dlkey).getSkel()
1235 elif key is None and dlkey is None:
1236 raise errors.BadRequest("No key or dlkey provided")
1238 if not (skel and skel.read(key)):
1239 raise errors.NotFound()
1241 if not self.canView("leaf", skel):
1242 raise errors.Unauthorized()
1244 dlkey = skel["dlkey"]
1246 if derived and filename is None:
1247 res = {}
1248 for filename in skel["derived"]["files"]:
1249 res[filename] = self.create_download_url(dlkey, filename, derived)
1250 else:
1251 if derived:
1252 # Check if Filename exist in the Derives. We sign nothing that not exist.
1253 if filename not in skel["derived"]["files"]:
1254 raise errors.NotFound("File not in derives")
1255 else:
1256 if filename is None:
1257 filename = skel["name"]
1258 elif filename != skel["name"]:
1259 raise errors.NotFound("Filename not match")
1261 res = self.create_download_url(dlkey, filename, derived)
1263 return self.render.view(res)
1265 def onEdit(self, skelType: SkelType, skel: SkeletonInstance):
1266 super().onEdit(skelType, skel)
1267 old_skel = self.editSkel(skelType)
1268 old_skel.setEntity(skel.dbEntity)
1270 if old_skel["name"] == skel["name"]: # name not changed we can return
1271 return
1273 # Move Blob to new name
1274 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects
1275 old_path = f"{skel['dlkey']}/source/{html.unescape(old_skel['name'])}"
1276 new_path = f"{skel['dlkey']}/source/{html.unescape(skel['name'])}"
1278 bucket = File.get_bucket(skel['dlkey'])
1280 if not (old_blob := bucket.get_blob(old_path)):
1281 raise errors.Gone()
1283 bucket.copy_blob(old_blob, bucket, new_path, if_generation_match=0)
1284 bucket.delete_blob(old_path)
1286 def onAdded(self, skelType: SkelType, skel: SkeletonInstance) -> None:
1287 super().onAdded(skelType, skel)
1288 if skel["mimetype"].startswith("image/"):
1289 if skel["size"] > self.IMAGE_META_MAX_SIZE:
1290 logging.warning(f"File size {skel['size']} exceeds limit {self.IMAGE_META_MAX_SIZE=}")
1291 return
1292 self.set_image_meta(skel["key"])
1294 @CallDeferred
1295 def set_image_meta(self, key: db.Key) -> None:
1296 """Write image metadata (height and width) to FileSkel"""
1297 skel = self.editSkel("leaf", key)
1298 if not skel.read(key):
1299 logging.error(f"File {key} does not exist")
1300 return
1301 if skel["width"] and skel["height"]:
1302 logging.info(f'File {skel["key"]} has already {skel["width"]=} and {skel["height"]=}')
1303 return
1304 file_name = html.unescape(skel["name"])
1305 blob = self.get_bucket(skel["dlkey"]).get_blob(f"""{skel["dlkey"]}/source/{file_name}""")
1306 if not blob:
1307 logging.error(f'Blob {skel["dlkey"]}/source/{file_name} is missing in Cloud Storage!')
1308 return
1310 file_obj = io.BytesIO()
1311 blob.download_to_file(file_obj)
1312 file_obj.seek(0)
1313 try:
1314 img = Image.open(file_obj)
1315 except Image.UnidentifiedImageError as e: # Can't load this image
1316 logging.exception(f'Cannot open {skel["key"]} | {skel["name"]} to set image meta data: {e}')
1317 return
1319 skel.patch(
1320 values={
1321 "width": img.width,
1322 "height": img.height,
1323 },
1324 )
1326 def mark_for_deletion(self, dlkey: str) -> None:
1327 """
1328 Adds a marker to the datastore that the file specified as *dlkey* can be deleted.
1330 Once the mark has been set, the data store is checked four times (default: every 4 hours)
1331 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise
1332 the mark and the file are removed from the datastore. These delayed checks are necessary
1333 due to database inconsistency.
1335 :param dlkey: Unique download-key of the file that shall be marked for deletion.
1336 """
1337 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry()
1339 if fileObj: # Its allready marked
1340 return
1342 fileObj = db.Entity(db.Key("viur-deleted-files"))
1343 fileObj["itercount"] = 0
1344 fileObj["dlkey"] = str(dlkey)
1346 db.Put(fileObj)
1349@PeriodicTask(interval=datetime.timedelta(hours=4))
1350def startCheckForUnreferencedBlobs():
1351 """
1352 Start searching for blob locks that have been recently freed
1353 """
1354 doCheckForUnreferencedBlobs()
1357@CallDeferred
1358def doCheckForUnreferencedBlobs(cursor=None):
1359 def getOldBlobKeysTxn(dbKey):
1360 obj = db.Get(dbKey)
1361 res = obj["old_blob_references"] or []
1362 if obj["is_stale"]:
1363 db.Delete(dbKey)
1364 else:
1365 obj["has_old_blob_references"] = False
1366 obj["old_blob_references"] = []
1367 db.Put(obj)
1368 return res
1370 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor)
1371 for lockObj in query.run(100):
1372 oldBlobKeys = db.RunInTransaction(getOldBlobKeysTxn, lockObj.key)
1373 for blobKey in oldBlobKeys:
1374 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry():
1375 # This blob is referenced elsewhere
1376 logging.info(f"Stale blob is still referenced, {blobKey}")
1377 continue
1378 # Add a marker and schedule it for deletion
1379 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry()
1380 if fileObj: # Its already marked
1381 logging.info(f"Stale blob already marked for deletion, {blobKey}")
1382 return
1383 fileObj = db.Entity(db.Key("viur-deleted-files"))
1384 fileObj["itercount"] = 0
1385 fileObj["dlkey"] = str(blobKey)
1386 logging.info(f"Stale blob marked dirty, {blobKey}")
1387 db.Put(fileObj)
1388 newCursor = query.getCursor()
1389 if newCursor:
1390 doCheckForUnreferencedBlobs(newCursor)
1393@PeriodicTask(interval=datetime.timedelta(hours=4))
1394def startCleanupDeletedFiles():
1395 """
1396 Increase deletion counter on each blob currently not referenced and delete
1397 it if that counter reaches maxIterCount
1398 """
1399 doCleanupDeletedFiles()
1402@CallDeferred
1403def doCleanupDeletedFiles(cursor=None):
1404 maxIterCount = 2 # How often a file will be checked for deletion
1405 query = db.Query("viur-deleted-files")
1406 if cursor:
1407 query.setCursor(cursor)
1408 for file in query.run(100):
1409 if "dlkey" not in file:
1410 db.Delete(file.key)
1411 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry():
1412 logging.info(f"""is referenced, {file["dlkey"]}""")
1413 db.Delete(file.key)
1414 else:
1415 if file["itercount"] > maxIterCount:
1416 logging.info(f"""Finally deleting, {file["dlkey"]}""")
1417 bucket = File.get_bucket(file["dlkey"])
1418 blobs = bucket.list_blobs(prefix=f"""{file["dlkey"]}/""")
1419 for blob in blobs:
1420 blob.delete()
1421 db.Delete(file.key)
1422 # There should be exactly 1 or 0 of these
1423 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99):
1424 f.delete()
1426 if f["serving_url"]:
1427 bucket = File.get_bucket(f["dlkey"])
1428 blob_key = blobstore.create_gs_key(
1429 f"/gs/{bucket.name}/{f['dlkey']}/source/{f['name']}"
1430 )
1431 images.delete_serving_url(blob_key) # delete serving url
1432 else:
1433 logging.debug(f"""Increasing count, {file["dlkey"]}""")
1434 file["itercount"] += 1
1435 db.Put(file)
1436 newCursor = query.getCursor()
1437 if newCursor:
1438 doCleanupDeletedFiles(newCursor)
1441@PeriodicTask(interval=datetime.timedelta(hours=4))
1442def start_delete_pending_files():
1443 """
1444 Start deletion of pending FileSkels that are older than 7 days.
1445 """
1446 DeleteEntitiesIter.startIterOnQuery(
1447 FileLeafSkel().all()
1448 .filter("pending =", True)
1449 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7))
1450 )
1453# DEPRECATED ATTRIBUTES HANDLING
1455def __getattr__(attr: str) -> object:
1456 if entry := {
1457 # stuff prior viur-core < 3.7
1458 "GOOGLE_STORAGE_BUCKET": ("File.get_bucket()", _private_bucket),
1459 }.get(attr):
1460 msg = f"{attr} was replaced by {entry[0]}"
1461 warnings.warn(msg, DeprecationWarning, stacklevel=2)
1462 logging.warning(msg, stacklevel=2)
1463 return entry[1]
1465 return super(__import__(__name__).__class__).__getattribute__(attr)