Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Authenticators Factory #794

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
69 changes: 69 additions & 0 deletions core/cat/factory/authorizator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from os import getenv
from typing import Type
from cat.log import log
from pydantic import BaseModel, ConfigDict
from cat.factory.custom_authorizator import BaseAuth, AuthorizatorNoAuth, AuthorizatorApiKey
from cat.mad_hatter.mad_hatter import MadHatter

class AuthorizatorSettings(BaseModel):
_pyclass: Type[BaseAuth] = None

@classmethod
def get_authorizator_from_config(cls, config):
if cls._pyclass is None or issubclass(cls._pyclass.default, BaseAuth) is False:
raise Exception(
"Authorizator configuration class has self._pyclass==None. Should be a valid Authorizator class"
)
return cls._pyclass.default(**config)

class AuthorizatorNoAuthConfig(AuthorizatorSettings):
_pyclass: Type = AuthorizatorNoAuth

model_config = ConfigDict(
json_schema_extra={
"humanReadableName": "No Authorizator",
"description": "No authorizator is used. All requests are allowed.",
"link": "",
}
)

class AuthorizatorApiKeyConfig(AuthorizatorSettings):
api_key: str
_pyclass: Type = AuthorizatorApiKey

model_config = ConfigDict(
json_schema_extra={
"humanReadableName": 'API Key Authorizator',
"description": 'Authoriza requests based on API key',
"link": "",
}
)

def get_allowed_authorizator_strategies():
list_authorizator_default = [
AuthorizatorNoAuthConfig,
AuthorizatorApiKeyConfig,
]

mad_hatter_instance = MadHatter()
list_authorizator = mad_hatter_instance.execute_hook(
"factory_allowed_authorizators", list_authorizator_default, cat=None
)

return list_authorizator

def get_authorizators_schemas():
AUTHORIZATOR_SCHEMAS = {}
for config_class in get_allowed_authorizator_strategies():
schema = config_class.model_json_schema()
schema["auhrizatorName"] = schema["title"]
AUTHORIZATOR_SCHEMAS[schema["title"]] = schema

return AUTHORIZATOR_SCHEMAS

def get_authorizator_from_name(name):
list_authorizator = get_allowed_authorizator_strategies()
for authorizator in list_authorizator:
if authorizator.__name__ == name:
return authorizator
return None
65 changes: 65 additions & 0 deletions core/cat/factory/custom_authorizator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from os import getenv
from fastapi import (
WebSocket,
Request,
HTTPException
)

from cat.log import log

class BaseAuth():
def __init__(self):
self.master_key = getenv("API_KEY")

def is_master_key(self, request):
if self.is_master_key == None:
return True
return request.headers.get("access_token") == self.master_key

def is_http_allowed(self, request):
return self.is_master_key(request)

def is_ws_allowed(self, websocket):
return True

class AuthorizatorNoAuth(BaseAuth):
def __init__(self):
pass

def is_master_key(self, request):
return True

def is_http_allowed(self, request: Request):
return True

def is_ws_allowed(self, websocket: WebSocket):
return True

class AuthorizatorApiKey(BaseAuth):
def __init__(self, api_key):
self.api_key = api_key
super().__init__()

def is_master_key(self, request):
if self.master_key == None:
raise HTTPException(
status_code=403,
detail={"error": "Master key is not set"}
)
return super().is_master_key(request)

def is_http_allowed(self, request: Request):
if request.headers.get("Authorization") != self.api_key:
raise HTTPException(
status_code=403,
detail={"error": "Invalid API Key"}
)
return True

def is_ws_allowed(self, websocket: WebSocket):
if websocket.headers.get("Authorization") != self.api_key:
raise HTTPException(
status_code=403,
detail={"error": "Invalid API Key"}
)
return True
2 changes: 1 addition & 1 deletion core/cat/factory/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class EmbedderSettings(BaseModel):
# instantiate an Embedder from configuration
@classmethod
def get_embedder_from_config(cls, config):
if cls._pyclass is None:
if cls._pyclass is None:
raise Exception(
"Embedder configuration class has self._pyclass==None. Should be a valid Embedder class"
)
Expand Down
43 changes: 35 additions & 8 deletions core/cat/headers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import os
import fnmatch

from fastapi import Request
from typing import Annotated
from cat.log import log

from fastapi import (
WebSocket,
Request,
)
from fastapi import Security, HTTPException
from fastapi.security.api_key import APIKeyHeader

Expand All @@ -18,8 +24,32 @@

api_key_header = APIKeyHeader(name="access_token", auto_error=False)

def ws_auth(
websocket: WebSocket,
) -> None | str:
"""Authenticate endpoint.

Check the provided key is available in API keys list.
Copy link
Contributor

@sambarza sambarza Apr 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note about of the doc. The check_ws_with_authorizator doesn't know how the authenticator will do the authorization check, do you agree?


Parameters
----------
request : Request
HTTP request.

Returns
-------
api_key : str | None
Returns the valid key if set in the `.env`, otherwise return None.

Raises
------
HTTPException
Error with status code `403` if the provided key is not valid.

def check_api_key(request: Request, api_key: str = Security(api_key_header)) -> None | str:
"""
return websocket.app.state.ccat.authorizator.is_ws_allowed(websocket)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case you still need to check the master_key as in the http request, otherwise the admin panel will break if the dev implements a custom rule in is_allowed_ws

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not so sure about this, I try to expand a little bit my consideration.
If the idea is to keep backward compatible the actual behavior, with this solution the websocket remains open.
So setting an API_KEY any endpoint will be protected and the websocket not.
What do you think?
I involve even @zAlweNy26 that knows how the admin panel works.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Imho the websocket by default should remain open, also because in browser environment it's not possible to set headers to the WebSocket instance.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't agree with keeping websocket chat open, this is a terrible security issue. If a dev wants to implement custom security autorizer for the websocket connection, it should not be public but the admin still needs to work.
To allow that we always have to somehow pass the api key from the admin panel.

In the default authorizer "BaseAuth" the check on is_ws_allowed returns true so by default the old admin should works until the dev implement an authorizer with custom is_ws_allowed logic.

To avoid having backward compatibility problems we need to extend the admin panel to somehow pass the api key in websocket connection.

Little recap:
with currrent websocket check implementation -> authorizator.is_ws_allowed(websocket)
using BaseAuth:
-> old admin panel will works because is_ws_allowed returns true (✅) (No backward compatibility)
-> direct websocket connection should works without api key because is_ws_allowed returns true (✅)
using CustomAuth with custom is_ws_allowed logic
-> admin panel should not work because the admin panel doesn't know the authorization logic (❌)
-> direct websocket connection should works implementing custom dev authorizer logic (✅)

implementing this websocket check -> authorizator.is_ws_master_key(websocket) or authorizator.is_ws_allowed(websocket)
using BaseAuth:
-> old admin panel should still works because is_ws_allowed returns true (✅) (No backward compatibility)
-> direct websocket connection should works both with and without api key (✅)
using CustomAuth with custom is_ws_allowed logic
-> old admin panel should not work (Backward compatibility -> we need to pass api key) (❌)
-> direct websocket connection should works implementing custom dev authorizer logic (✅)

what do you think? @giovannialbero1992 @zAlweNy26

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we are overcomplicating because two things happen:

  • the Cat is designed as a machine to machine microservice
  • the admin is not machine to machine

We currently understand what is going on but this could be really hard to get for newbies.
As a long term vision, I'd love the Cat to be used standalone, and also as a microservice.
Most of all, it has to be all around easy to use :)

I take it as my own fault, for insisting so much on both having no users, and having the admin included in core.
I take it also on my shoulders to merge this PR and directly adding the following (help needed only on the admin):

Admin user

There will be an user system into core, and as a start it will only include the admin user.
Credentials for the admin will be changeable via two env variables:
CCAT_ADMIN_USER // defaults to admin
CCAT_ADMIN_PASSWORD // defaults to admin
This is a raw start for a full fledged user system, that can evolve without breaking the admin panel.

When admin panel opens, it can do as follows:

  • search for a JWT in localStorage, if not
  • shows a username / password modal
  • sends credentials to the core and receives a temp JWT

JWT is used for:

  • http requests via Authorization header
  • ws communication via a "token" key included in each message (or only on connection via URL parameter)

Machine 2 machine

As it is now, API_KEY will cover the http endpoints and websocket stays open.
We could easily add a WS_API_KEY to also secure websockets.

In M2M communication, as far as I know, there is no need for a JWT and keys can be used directly.
Nothing prevents to use the user/pass credential system also for M2M.

Summary

  • Browsers are authenticated via credentials and JWT
  • M2M is authenticated both like a browser or simply via static keys.
  • Auth can be customized at will as we already designed, and the admin panel is saved.

Custom auth stays as @giovannialbero1992 implemented it, with minor modifications.
I'm going all in, tell me if you see something I'm not seeing.


def http_auth(request: Request) -> None | str:
"""Authenticate endpoint.

Check the provided key is available in API keys list.
Expand All @@ -42,16 +72,13 @@ def check_api_key(request: Request, api_key: str = Security(api_key_header)) ->
Error with status code `403` if the provided key is not valid.

"""
if not API_KEY:
return None
if fnmatch.fnmatch(request.url.path, "/admin*"):
authorizator = request.app.state.ccat.authorizator
if authorizator.is_master_key(request) or authorizator.is_http_allowed(request):
return None
if api_key in API_KEY:
return api_key
else:
raise HTTPException(
status_code=403,
detail={"error": "Invalid API Key"}
detail={"error": "Invalid Credentials"}
)


Expand Down
37 changes: 36 additions & 1 deletion core/cat/looking_glass/cheshire_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
from cat.db import crud
from cat.factory.custom_llm import CustomOpenAI
from cat.factory.embedder import get_embedder_from_name
from cat.factory.authorizator import AuthorizatorSettings, get_authorizator_from_name
import cat.factory.embedder as embedders
import cat.factory.authorizator as authorizators
from cat.factory.llm import LLMDefaultConfig
from cat.factory.llm import get_llm_from_name
from cat.looking_glass.agent_manager import AgentManager
Expand All @@ -22,7 +24,7 @@
from cat.memory.long_term_memory import LongTermMemory
from cat.rabbit_hole import RabbitHole
from cat.utils import singleton

from fastapi import Request

class Procedure(Protocol):
name: str
Expand Down Expand Up @@ -65,6 +67,9 @@ def __init__(self):
# load LLM and embedder
self.load_natural_language()

# load Authorizator
self.load_authorizator()

# Load memories (vector collections and working_memory)
self.load_memory()

Expand Down Expand Up @@ -219,6 +224,35 @@ def load_language_embedder(self) -> embedders.EmbedderSettings:

return embedder

def load_authorizator(self):
"""Load the authorizator."""
# Authorizator
selected_authorizator = crud.get_setting_by_name(name="authorizator_selected")

if selected_authorizator is not None:
# get Authorizator factory class
selected_authorizator_class = selected_authorizator["value"]["name"]
FactoryClass = get_authorizator_from_name(selected_authorizator_class)

# obtain configuration and instantiate Authorizator
selected_authorizator_config = crud.get_setting_by_name(
name=selected_authorizator_class
)
try:
authorizator = FactoryClass.get_authorizator_from_config(
selected_authorizator_config["value"]
)
log.critical(f"Authorizator: {authorizator.api_key}")
except AttributeError as e:
import traceback

traceback.print_exc()
authorizator = authorizators.AuthorizatorNoAuthConfig.get_authorizator_from_config({})
else:
authorizator = authorizators.AuthorizatorNoAuthConfig.get_authorizator_from_config({})

self.authorizator = authorizator

def load_memory(self):
"""Load LongTerMemory and WorkingMemory."""
# Memory
Expand Down Expand Up @@ -351,3 +385,4 @@ def llm(self, prompt, *args, **kwargs) -> str:
# Check if self._llm is a chat model and call it as a completion model
if isinstance(self._llm, BaseChatModel):
return self._llm.call_as_llm(prompt)

16 changes: 16 additions & 0 deletions core/cat/mad_hatter/core_plugin/hooks/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,19 @@ def factory_allowed_embedders(allowed, cat) -> List:
list of allowed embedders
"""
return allowed

@hook(priority=0)
def factory_allowed_authorizators(allowed, cat) -> List:
"""Hook to extend list of supported authorizators.

Parameters
---------
allowed : List of AuthorizatorSettings classes
list of allowed authorizators

Returns
-------
supported : List of AuthorizatorSettings classes
list of allowed authorizators
"""
return allowed
21 changes: 11 additions & 10 deletions core/cat/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
from fastapi.middleware.cors import CORSMiddleware

from cat.log import log
from cat.routes import base, settings, llm, embedder, memory, plugins, upload, websocket
from cat.routes import base, settings, llm, embedder, memory, plugins, upload, websocket, authorizator
from cat.routes.static import public, admin, static
from cat.headers import check_api_key
from cat.headers import http_auth, ws_auth
from cat.routes.openapi import get_openapi_configuration_function
from cat.looking_glass.cheshire_cat import CheshireCat

Expand Down Expand Up @@ -62,14 +62,15 @@ def custom_generate_unique_id(route: APIRoute):
)

# Add routers to the middleware stack.
cheshire_cat_api.include_router(base.router, tags=["Status"], dependencies=[Depends(check_api_key)])
cheshire_cat_api.include_router(settings.router, tags=["Settings"], prefix="/settings", dependencies=[Depends(check_api_key)])
cheshire_cat_api.include_router(llm.router, tags=["Large Language Model"], prefix="/llm", dependencies=[Depends(check_api_key)])
cheshire_cat_api.include_router(embedder.router, tags=["Embedder"], prefix="/embedder", dependencies=[Depends(check_api_key)])
cheshire_cat_api.include_router(plugins.router, tags=["Plugins"], prefix="/plugins", dependencies=[Depends(check_api_key)])
cheshire_cat_api.include_router(memory.router, tags=["Memory"], prefix="/memory", dependencies=[Depends(check_api_key)])
cheshire_cat_api.include_router(upload.router, tags=["Rabbit Hole"], prefix="/rabbithole", dependencies=[Depends(check_api_key)])
cheshire_cat_api.include_router(websocket.router, tags=["WebSocket"])
cheshire_cat_api.include_router(base.router, tags=["Status"], dependencies=[Depends(http_auth)])
cheshire_cat_api.include_router(settings.router, tags=["Settings"], prefix="/settings", dependencies=[Depends(http_auth)])
cheshire_cat_api.include_router(llm.router, tags=["Large Language Model"], prefix="/llm", dependencies=[Depends(http_auth)])
cheshire_cat_api.include_router(embedder.router, tags=["Embedder"], prefix="/embedder", dependencies=[Depends(http_auth)])
cheshire_cat_api.include_router(plugins.router, tags=["Plugins"], prefix="/plugins", dependencies=[Depends(http_auth)])
cheshire_cat_api.include_router(memory.router, tags=["Memory"], prefix="/memory", dependencies=[Depends(http_auth)])
cheshire_cat_api.include_router(upload.router, tags=["Rabbit Hole"], prefix="/rabbithole", dependencies=[Depends(http_auth)])
cheshire_cat_api.include_router(authorizator.router, tags=["Authorizator"], prefix="/authorizator", dependencies=[Depends(http_auth)])
cheshire_cat_api.include_router(websocket.router, tags=["Websocket"], dependencies=[Depends(ws_auth)])

# mount static files
# this cannot be done via fastapi.APIrouter:
Expand Down