I have a list of file names that I need to search on Azure. Right now as a noob I am looping over each blob names and comparing strings but I think there has to be easiest a
Please use exists
method in azure storage python sdk.
def ifblob_exists(filename):
try:
container_name = '***'
block_blob_service = BlockBlobService(account_name=accountName, account_key=accountKey,
socket_timeout=10000)
isExist = block_blob_service.exists(container_name, filename)
if isExist:
print("\t Blob exists :" + " " + filename)
else:
print("\t Blob exists :" + " " + filename)
Surely, if you have list of filename, you need to loop call above function at least.
Hope it helps you.
Listing all blobs is very costly operation inside the Azure Storage infrastructure because it translates into a full scan.
Find below an example to efficiently check if the blob (e.g. filename in your case) exists or not in a given container:
from azure.storage.blob import BlockBlobService
from datetime import datetime
def check_if_blob_exists(container_name: str, blob_names: []):
start_time = datetime.now()
if not container_name or container_name.isspace():
raise ValueError("Container name cannot be none, empty or whitespace.")
if not blob_names:
raise ValueError("Block blob names cannot be none.")
block_blob_service = BlockBlobService(account_name="{Storage Account Name}", account_key="{Storage Account Key}")
for blob_name in blob_names:
if block_blob_service.exists(container_name, blob_name):
print("\nBlob '{0}' found!".format(blob_name));
else:
print("\nBlob '{0}' NOT found!".format(blob_name));
end_time = datetime.now()
print("\n***** Elapsed Time => {0} *****".format(end_time - start_time))
if __name__ == "__main__":
blob_names = []
# Exists
blob_names.append("eula.1028.txt")
blob_names.append("eula.1031.txt")
blob_names.append("eula.1033.txt")
blob_names.append("eula.1036.txt")
blob_names.append("eula.1040.txt")
# Don't exist
blob_names.append("blob1")
blob_names.append("blob2")
blob_names.append("blob3")
blob_names.append("blob4")
check_if_blob_exists("containername", blob_names)
Find below a screenshot of a quick execution test from my laptop from West US (~150 Mbps of Download, ~3.22 Mbps of Upload, per Google Speed Test) checking if 9 blobs exists in a LRS Storage Account in West US as well.