import json
import threading
import time
import logging
import warnings
from .authority import canonicalize
from .oauth2cli.oidc import decode_part, decode_id_token
from .oauth2cli.oauth2 import Client
logger = logging.getLogger(__name__)
_GRANT_TYPE_BROKER = "broker"
def is_subdict_of(small, big):
return dict(big, **small) == big
def _get_username(id_token_claims):
return id_token_claims.get(
"preferred_username", # AAD
id_token_claims.get("upn")) # ADFS 2019
class TokenCache(object):
"""This is considered as a base class containing minimal cache behavior.
Although it maintains tokens using unified schema across all MSAL libraries,
this class does not serialize/persist them.
See subclass :class:`SerializableTokenCache` for details on serialization.
"""
class CredentialType:
ACCESS_TOKEN = "AccessToken"
REFRESH_TOKEN = "RefreshToken"
ACCOUNT = "Account" # Not exactly a credential type, but we put it here
ID_TOKEN = "IdToken"
APP_METADATA = "AppMetadata"
class AuthorityType:
ADFS = "ADFS"
MSSTS = "MSSTS" # MSSTS means AAD v2 for both AAD & MSA
def __init__(self):
self._lock = threading.RLock()
self._cache = {}
self.key_makers = {
# Note: We have changed token key format before when ordering scopes;
# changing token key won't result in cache miss.
self.CredentialType.REFRESH_TOKEN:
lambda home_account_id=None, environment=None, client_id=None,
target=None, **ignored_payload_from_a_real_token:
"-".join([
home_account_id or "",
environment or "",
self.CredentialType.REFRESH_TOKEN,
client_id or "",
"", # RT is cross-tenant in AAD
target or "", # raw value could be None if deserialized from other SDK
]).lower(),
self.CredentialType.ACCESS_TOKEN:
lambda home_account_id=None, environment=None, client_id=None,
realm=None, target=None,
# Note: New field(s) can be added here
#key_id=None,
**ignored_payload_from_a_real_token:
"-".join([ # Note: Could use a hash here to shorten key length
home_account_id or "",
environment or "",
self.CredentialType.ACCESS_TOKEN,
client_id or "",
realm or "",
target or "",
#key_id or "", # So ATs of different key_id can coexist
]).lower(),
self.CredentialType.ID_TOKEN:
lambda home_account_id=None, environment=None, client_id=None,
realm=None, **ignored_payload_from_a_real_token:
"-".join([
home_account_id or "",
environment or "",
self.CredentialType.ID_TOKEN,
client_id or "",
realm or "",
"" # Albeit irrelevant, schema requires an empty scope here
]).lower(),
self.CredentialType.ACCOUNT:
lambda home_account_id=None, environment=None, realm=None,
**ignored_payload_from_a_real_entry:
"-".join([
home_account_id or "",
environment or "",
realm or "",
]).lower(),
self.CredentialType.APP_METADATA:
lambda environment=None, client_id=None, **kwargs:
"appmetadata-{}-{}".format(environment or "", client_id or ""),
}
def _get_access_token(
self,
home_account_id, environment, client_id, realm, target, # Together they form a compound key
default=None,
): # O(1)
return self._get(
self.CredentialType.ACCESS_TOKEN,
self.key_makers[TokenCache.CredentialType.ACCESS_TOKEN](
home_account_id=home_account_id,
environment=environment,
client_id=client_id,
realm=realm,
target=" ".join(target),
),
default=default)
def _get_app_metadata(self, environment, client_id, default=None): # O(1)
return self._get(
self.CredentialType.APP_METADATA,
self.key_makers[TokenCache.CredentialType.APP_METADATA](
environment=environment,
client_id=client_id,
),
default=default)
def _get(self, credential_type, key, default=None): # O(1)
with self._lock:
return self._cache.get(credential_type, {}).get(key, default)
@staticmethod
def _is_matching(entry: dict, query: dict, target_set: set = None) -> bool:
return is_subdict_of(query or {}, entry) and (
target_set <= set(entry.get("target", "").split())
if target_set else True)
def search(self, credential_type, target=None, query=None, *, now=None): # O(n) generator
"""Returns a generator of matching entries.
It is O(1) for AT hits, and O(n) for other types.
Note that it holds a lock during the entire search.
"""
target = sorted(target or []) # Match the order sorted by add()
assert isinstance(target, list), "Invalid parameter type"
preferred_result = None
if (credential_type == self.CredentialType.ACCESS_TOKEN
and isinstance(query, dict)
and "home_account_id" in query and "environment" in query
and "client_id" in query and "realm" in query and target
): # Special case for O(1) AT lookup
preferred_result = self._get_access_token(
query["home_account_id"], query["environment"],
query["client_id"], query["realm"], target)
if preferred_result and self._is_matching(
preferred_result, query,
# Needs no target_set here because it is satisfied by dict key
):
yield preferred_result
target_set = set(target)
with self._lock:
# O(n) search. The key is NOT used in search.
now = int(time.time() if now is None else now)
expired_access_tokens = [
# Especially when/if we key ATs by ephemeral fields such as key_id,
# stale ATs keyed by an old key_id would stay forever.
# Here we collect them for their removal.
]
for entry in self._cache.get(credential_type, {}).values():
if ( # Automatically delete expired access tokens
credential_type == self.CredentialType.ACCESS_TOKEN
and int(entry["expires_on"]) < now
):
expired_access_tokens.append(entry) # Can't delete them within current for-loop
continue
if (entry != preferred_result # Avoid yielding the same entry twice
and self._is_matching(entry, query, target_set=target_set)
):
yield entry
for at in expired_access_tokens:
self.remove_at(at)
def find(self, credential_type, target=None, query=None, *, now=None):
"""Equivalent to list(search(...))."""
warnings.warn(
"Use list(search(...)) instead to explicitly get a list.",
DeprecationWarning)
return list(self.search(credential_type, target=target, query=query, now=now))
def add(self, event, now=None):
"""Handle a token obtaining event, and add tokens into cache."""
def make_clean_copy(dictionary, sensitive_fields): # Masks sensitive info
return {
k: "********" if k in sensitive_fields else v
for k, v in dictionary.items()
}
clean_event = dict(
event,
data=make_clean_copy(event.get("data", {}), (
"password", "client_secret", "refresh_token", "assertion",
)),
response=make_clean_copy(event.get("response", {}), (
"id_token_claims", # Provided by broker
"access_token", "refresh_token", "id_token", "username",
)),
)
logger.debug("event=%s", json.dumps(
# We examined and concluded that this log won't have Log Injection risk,
# because the event payload is already in JSON so CR/LF will be escaped.
clean_event,
indent=4, sort_keys=True,
default=str, # assertion is in bytes in Python 3
))
return self.__add(event, now=now)
def __parse_account(self, response, id_token_claims):
"""Return client_info and home_account_id"""
if "client_info" in response: # It happens when client_info and profile are in request
client_info = json.loads(decode_part(response["client_info"]))
if "uid" in client_info and "utid" in client_info:
return client_info, "{uid}.{utid}".format(**client_info)
# https://github.com/AzureAD/microsoft-authentication-library-for-python/issues/387
if id_token_claims: # This would be an end user on ADFS-direct scenario
sub = id_token_claims["sub"] # "sub" always exists, per OIDC specs
return {"uid": sub}, sub
# client_credentials flow will reach this code path
return {}, None
def __add(self, event, now=None):
# event typically contains: client_id, scope, token_endpoint,
# response, params, data, grant_type
environment = realm = None
if "token_endpoint" in event:
_, environment, realm = canonicalize(event["token_endpoint"])
if "environment" in event: # Always available unless in legacy test cases
environment = event["environment"] # Set by application.py
response = event.get("response", {})
data = event.get("data", {})
access_token = response.get("access_token")
refresh_token = response.get("refresh_token")
id_token = response.get("id_token")
id_token_claims = response.get("id_token_claims") or ( # Prefer the claims from broker
# Only use decode_id_token() when necessary, it contains time-sensitive validation
decode_id_token(id_token, client_id=event["client_id"]) if id_token else {})
client_info, home_account_id = self.__parse_account(response, id_token_claims)
target = ' '.join(sorted(event.get("scope") or [])) # Schema should have required sorting
with self._lock:
now = int(time.time() if now is None else now)
if access_token:
default_expires_in = ( # https://www.rfc-editor.org/rfc/rfc6749#section-5.1
int(response.get("expires_on")) - now # Some Managed Identity emits this
) if response.get("expires_on") else 600
expires_in = int( # AADv1-like endpoint returns a string
response.get("expires_in", default_expires_in))
ext_expires_in = int( # AADv1-like endpoint returns a string
response.get("ext_expires_in", expires_in))
at = {
"credential_type": self.CredentialType.ACCESS_TOKEN,
"secret": access_token,
"home_account_id": home_account_id,
"environment": environment,
"client_id": event.get("client_id"),
"target": target,
"realm": realm,
"token_type": response.get("token_type", "Bearer"),
"cached_at": str(now), # Schema defines it as a string
"expires_on": str(now + expires_in), # Same here
"extended_expires_on": str(now + ext_expires_in) # Same here
}
at.update({k: data[k] for k in data if k in {
# Also store extra data which we explicitly allow
# So that we won't accidentally store a user's password etc.
"key_id", # It happens in SSH-cert or POP scenario
}})
if "refresh_in" in response:
refresh_in = response["refresh_in"] # It is an integer
at["refresh_on"] = str(now + refresh_in) # Schema wants a string
self.modify(self.CredentialType.ACCESS_TOKEN, at, at)
if client_info and not event.get("skip_account_creation"):
account = {
"home_account_id": home_account_id,
"environment": environment,
"realm": realm,
"local_account_id": event.get(
"_account_id", # Came from mid-tier code path.
# Emperically, it is the oid in AAD or cid in MSA.
id_token_claims.get("oid", id_token_claims.get("sub"))),
"username": _get_username(id_token_claims)
or data.get("username") # Falls back to ROPC username
or event.get("username") # Falls back to Federated ROPC username
or "", # The schema does not like null
"authority_type": event.get(
"authority_type", # Honor caller's choice of authority_type
self.AuthorityType.ADFS if realm == "adfs"
else self.AuthorityType.MSSTS),
# "client_info": response.get("client_info"), # Optional
}
grant_types_that_establish_an_account = (
_GRANT_TYPE_BROKER, "authorization_code", "password",
Client.DEVICE_FLOW["GRANT_TYPE"])
if event.get("grant_type") in grant_types_that_establish_an_account:
account["account_source"] = event["grant_type"]
self.modify(self.CredentialType.ACCOUNT, account, account)
if id_token:
idt = {
"credential_type": self.CredentialType.ID_TOKEN,
"secret": id_token,
"home_account_id": home_account_id,
"environment": environment,
"realm": realm,
"client_id": event.get("client_id"),
# "authority": "it is optional",
}
self.modify(self.CredentialType.ID_TOKEN, idt, idt)
if refresh_token:
rt = {
"credential_type": self.CredentialType.REFRESH_TOKEN,
"secret": refresh_token,
"home_account_id": home_account_id,
"environment": environment,
"client_id": event.get("client_id"),
"target": target, # Optional per schema though
"last_modification_time": str(now), # Optional. Schema defines it as a string.
}
if "foci" in response:
rt["family_id"] = response["foci"]
self.modify(self.CredentialType.REFRESH_TOKEN, rt, rt)
app_metadata = {
"client_id": event.get("client_id"),
"environment": environment,
}
if "foci" in response:
app_metadata["family_id"] = response.get("foci")
self.modify(self.CredentialType.APP_METADATA, app_metadata, app_metadata)
def modify(self, credential_type, old_entry, new_key_value_pairs=None):
# Modify the specified old_entry with new_key_value_pairs,
# or remove the old_entry if the new_key_value_pairs is None.
# This helper exists to consolidate all token add/modify/remove behaviors,
# so that the sub-classes will have only one method to work on,
# instead of patching a pair of update_xx() and remove_xx() per type.
# You can monkeypatch self.key_makers to support more types on-the-fly.
key = self.key_makers[credential_type](**old_entry)
with self._lock:
if new_key_value_pairs: # Update with them
entries = self._cache.setdefault(credential_type, {})
entries[key] = dict(
old_entry, # Do not use entries[key] b/c it might not exist
**new_key_value_pairs)
else: # Remove old_entry
self._cache.setdefault(credential_type, {}).pop(key, None)
def remove_rt(self, rt_item):
assert rt_item.get("credential_type") == self.CredentialType.REFRESH_TOKEN
return self.modify(self.CredentialType.REFRESH_TOKEN, rt_item)
def update_rt(self, rt_item, new_rt):
assert rt_item.get("credential_type") == self.CredentialType.REFRESH_TOKEN
return self.modify(self.CredentialType.REFRESH_TOKEN, rt_item, {
"secret": new_rt,
"last_modification_time": str(int(time.time())), # Optional. Schema defines it as a string.
})
def remove_at(self, at_item):
assert at_item.get("credential_type") == self.CredentialType.ACCESS_TOKEN
return self.modify(self.CredentialType.ACCESS_TOKEN, at_item)
def remove_idt(self, idt_item):
assert idt_item.get("credential_type") == self.CredentialType.ID_TOKEN
return self.modify(self.CredentialType.ID_TOKEN, idt_item)
def remove_account(self, account_item):
assert "authority_type" in account_item
return self.modify(self.CredentialType.ACCOUNT, account_item)
class SerializableTokenCache(TokenCache):
"""This serialization can be a starting point to implement your own persistence.
This class does NOT actually persist the cache on disk/db/etc..
Depending on your need,
the following simple recipe for file-based, unencrypted persistence may be sufficient::
import os, atexit, msal
cache_filename = os.path.join( # Persist cache into this file
os.getenv(
# Automatically wipe out the cache from Linux when user's ssh session ends.
# See also https://github.com/AzureAD/microsoft-authentication-library-for-python/issues/690
"XDG_RUNTIME_DIR", ""),
"my_cache.bin")
cache = msal.SerializableTokenCache()
if os.path.exists(cache_filename):
cache.deserialize(open(cache_filename, "r").read())
atexit.register(lambda:
open(cache_filename, "w").write(cache.serialize())
# Hint: The following optional line persists only when state changed
if cache.has_state_changed else None
)
app = msal.ClientApplication(..., token_cache=cache)
...
Alternatively, you may use a more sophisticated cache persistence library,
`MSAL Extensions <https://github.com/AzureAD/microsoft-authentication-extensions-for-python>`_,
which provides token cache persistence with encryption, and more.
:var bool has_state_changed:
Indicates whether the cache state in the memory has changed since last
:func:`~serialize` or :func:`~deserialize` call.
"""
has_state_changed = False
def add(self, event, **kwargs):
super(SerializableTokenCache, self).add(event, **kwargs)
self.has_state_changed = True
def modify(self, credential_type, old_entry, new_key_value_pairs=None):
super(SerializableTokenCache, self).modify(
credential_type, old_entry, new_key_value_pairs)
self.has_state_changed = True
def deserialize(self, state):
# type: (Optional[str]) -> None
"""Deserialize the cache from a state previously obtained by serialize()"""
with self._lock:
self._cache = json.loads(state) if state else {}
self.has_state_changed = False # reset
def serialize(self):
# type: () -> str
"""Serialize the current cache state into a string."""
with self._lock:
self.has_state_changed = False
return json.dumps(self._cache, indent=4)