Skip to content

Commit 53e91c6

Browse files
committed
Fixes Docker setup and sessions
1 parent 8540bdf commit 53e91c6

14 files changed

+111
-57
lines changed

.devcontainer/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.13-slim
1+
FROM python:3.12-slim
22

33
ENV PIP_DEFAULT_TIMEOUT=100
44
ENV PIP_DISABLE_PIP_VERSION_CHECK=on

Dockerfile

+7-10
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,24 @@ ENV PYTHONFAULTHANDLER=1
1212
ENV PYTHONHASHSEED=random
1313
ENV PYTHONUNBUFFERED=1
1414

15-
USER talkd
16-
WORKDIR /app
15+
RUN apt update -y && \
16+
apt install -y gcc libpq-dev && \
17+
apt-get autoremove -y && \
18+
apt-get clean -y
19+
20+
RUN pip install poetry
1721

22+
WORKDIR /app
1823
COPY poetry.lock pyproject.toml README.md /app/
1924
COPY pytest.ini /app/src/
2025

21-
USER root
22-
RUN apt update -y && apt upgrade -y && apt install -y gcc libpq-dev
23-
RUN pip install -U pip poetry
24-
2526
RUN poetry config virtualenvs.create false && \
2627
poetry install --only main
2728

28-
USER talkd
29-
3029
COPY /static /app/static
3130
COPY /etc /app/etc
3231
COPY /src /app/src
3332

34-
USER root
35-
3633
RUN chmod +x /app/etc/run.sh
3734
RUN chmod +x /app/etc/run-tests.sh
3835

Dockerfile.test

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
FROM python:3.12.3-slim
2+
LABEL org.opencontainers.image.source https://github.com/talkdai/dialog
3+
LABEL org.opencontainers.image.licenses MIT
4+
5+
RUN useradd --user-group --system --create-home --no-log-init talkd
6+
RUN export PATH="/home/talkd/.local/bin:$PATH"
7+
8+
ENV PIP_DEFAULT_TIMEOUT=100
9+
ENV PIP_DISABLE_PIP_VERSION_CHECK=on
10+
ENV PIP_NO_CACHE_DIR=on
11+
ENV PYTHONFAULTHANDLER=1
12+
ENV PYTHONHASHSEED=random
13+
ENV PYTHONUNBUFFERED=1
14+
15+
RUN apt update -y && \
16+
apt install -y gcc libpq-dev && \
17+
apt-get autoremove -y && \
18+
apt-get clean -y
19+
20+
RUN pip install poetry
21+
22+
WORKDIR /app
23+
COPY poetry.lock pyproject.toml README.md /app/
24+
COPY pytest.ini /app/src/
25+
26+
RUN poetry config virtualenvs.create false && \
27+
poetry install
28+
29+
COPY /static /app/static
30+
COPY /etc /app/etc
31+
COPY /src /app/src
32+
33+
RUN chmod +x /app/etc/run.sh
34+
RUN chmod +x /app/etc/run-tests.sh
35+
36+
WORKDIR /app/src
37+
38+
CMD [ "/app/etc/run.sh" ]

Makefile

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
lint:
22
poetry run black .
33

4-
db-up:
5-
docker compose up db
6-
74
run:
85
poetry run uvicorn --app-dir src main:app --reload --host 0.0.0.0 --port 8000 --lifespan on --env-file .env
96

@@ -15,4 +12,13 @@ test-build:
1512
docker compose -f docker-compose.test.yml run --rm --build dialog
1613

1714
test:
18-
docker compose -f docker-compose.test.yml run --rm dialog
15+
docker compose -f docker-compose.test.yml run --rm dialog
16+
17+
run-build:
18+
docker compose up --build
19+
20+
run-db:
21+
docker compose up db
22+
23+
run-ui:
24+
docker compose -f docker-compose-open-webui.yml up

docker-compose.dev-container.yml

+8-7
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
1-
version: '3.3'
1+
version: "3.3"
22
services:
33
db:
44
image: pgvector/pgvector:pg15
55
restart: always
66
ports:
7-
- '5432:5432'
7+
- "5432:5432"
88
environment:
99
POSTGRES_USER: talkdai
1010
POSTGRES_PASSWORD: talkdai
1111
POSTGRES_DB: talkdai
1212
volumes:
13-
- ./data/db:/var/lib/postgresql/data
14-
- ./psql/db-ext-vector.sql:/docker-entrypoint-initdb.d/db-ext-vector.sql
13+
- ./data/db:/var/lib/postgresql/data
14+
- ./psql/db-ext-vector.sql:/docker-entrypoint-initdb.d/db-ext-vector.sql
1515
healthcheck:
1616
test: ["CMD", "pg_isready", "-d", "talkdai", "-U", "talkdai"]
1717
interval: 10s
@@ -23,13 +23,14 @@ services:
2323
dockerfile: Dockerfile
2424
stdin_open: true
2525
tty: true
26+
entrypoint: ["/bin/bash"]
2627
volumes:
2728
- ./:/app
2829
- ./data/:/app/data/
2930
- ./.empty:/app/data/db
3031
- ./static:/app/static
3132
ports:
32-
- '8000:8000'
33+
- "8000:8000"
3334
depends_on:
3435
db:
3536
condition: service_healthy
@@ -42,7 +43,7 @@ services:
4243
openwebui:
4344
image: ghcr.io/open-webui/open-webui:main
4445
ports:
45-
- '3000:8080'
46+
- "3000:8080"
4647
environment:
4748
- OPENAI_API_KEYS=FAKE-KEY;
4849
- OPENAI_API_BASE_URLS=http://dialog:8000/openai;
@@ -56,4 +57,4 @@ services:
5657
condition: service_started
5758

5859
volumes:
59-
open-webui:
60+
open-webui:

etc/run-tests.sh

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#!/bin/bash
22
set -e
33

4-
cd /app/src/
5-
poetry install --only dev
64
poetry run alembic upgrade head
7-
exec poetry run pytest --cov=. --cov-config=.coveragerc . -vvv
5+
exec poetry run pytest --cov=. --cov-config=.coveragerc . -vvv --timeout 10

etc/run.sh

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#!/bin/bash
22

3-
alembic upgrade head
3+
poetry run alembic upgrade head
44
[[ -z "${DIALOG_LOADCSV_CLEARDB}" ]] || CLEARDB_COMMAND=--cleardb
55
[[ -z "${DIALOG_LOADCSV_EMBED_COLUMNS}" ]] || EMBED_COLUMNS="--embed-columns ${DIALOG_LOADCSV_EMBED_COLUMNS}"
6-
python load_csv.py --path ${DIALOG_DATA_PATH} ${CLEARDB_COMMAND} ${EMBED_COLUMNS}
6+
poetry run python load_csv.py --path ${DIALOG_DATA_PATH} ${CLEARDB_COMMAND} ${EMBED_COLUMNS}
77

88
/app/etc/install-plugins.sh
99

@@ -13,7 +13,7 @@ if [ -n "${TEST}" ]; then
1313
fi
1414

1515
if [ -n "${DEBUG}" ]; then
16-
exec uvicorn main:app --host 0.0.0.0 --port ${PORT} --reload
16+
exec poetry run uvicorn main:app --host 0.0.0.0 --port ${PORT} --reload
1717
else
18-
exec uvicorn main:app --host 0.0.0.0 --port ${PORT}
18+
exec poetry run uvicorn main:app --host 0.0.0.0 --port ${PORT}
1919
fi

poetry.lock

+15-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ responses = "^0.25.0"
3737
pytest-mock = "^3.12.0"
3838
pytest-alembic = "^0.11.0"
3939
pytest-cov = "^4.1.0"
40+
pytest-timeout = "^2.3.1"
4041

4142
[build-system]
4243
requires = ["poetry-core"]

src/dialog/llm/agents/default.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ def __init__(self, *args, **kwargs):
2525

2626
@property
2727
def memory(self) -> BaseChatMemory:
28-
if self.session_id:
29-
return generate_memory_instance(
30-
session_id=self.session_id,
28+
# Returns the memory instance, if a session_id is not provided, a new session_id is generated
29+
# and the memory instance is created.
30+
return generate_memory_instance(
31+
session_id=self.session_id if self.session_id else str(uuid4()),
3132
parent_session_id=self.parent_session_id,
3233
dbsession=self.dbsession,
3334
database_url=Settings().DATABASE_URL
3435
)
35-
return None
3636

3737
def generate_prompt(self, text):
3838
self.relevant_contents = get_most_relevant_contents_from_message(

src/load_csv.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from dialog_lib.embeddings.generate import generate_embeddings
1010
from dialog.llm.embeddings import EMBEDDINGS_LLM
1111
from dialog_lib.db.models import CompanyContent
12-
from dialog.db import get_session
12+
from dialog.db import session_scope
1313
from dialog.settings import Settings
1414

1515
import logging
@@ -21,7 +21,6 @@
2121

2222
logger = logging.getLogger("make_embeddings")
2323

24-
session = next(get_session())
2524
NECESSARY_COLS = ["category", "subcategory", "question", "content"]
2625
PK_METADATA_COLS = ["category", "subcategory", "question"]
2726

@@ -33,7 +32,7 @@ def _get_csv_cols(path: str) -> List[str]:
3332
return reader.fieldnames
3433

3534

36-
def retrieve_docs_from_vectordb() -> List[Document]:
35+
def retrieve_docs_from_vectordb(session) -> List[Document]:
3736
"""Retrieve all documents from the vector store."""
3837
company_contents: List[CompanyContent] = session.query(CompanyContent).all()
3938
return [
@@ -97,7 +96,7 @@ def load_csv_with_metadata(
9796

9897

9998
def load_csv_and_generate_embeddings(
100-
path, cleardb=False, embed_columns: Optional[list[str]] = None
99+
path, session, cleardb=False, embed_columns: Optional[list[str]] = None
101100
):
102101
"""
103102
Load the knowledge base CSV, get their embeddings and store them into the vector store.
@@ -121,7 +120,7 @@ def load_csv_and_generate_embeddings(
121120
session.commit()
122121

123122
# Get existing docs
124-
docs_in_db: List[Document] = retrieve_docs_from_vectordb()
123+
docs_in_db: List[Document] = retrieve_docs_from_vectordb(session)
125124
logging.info(f"Existing docs: {len(docs_in_db)}")
126125
existing_pks: List[str] = [
127126
get_document_pk(doc, PK_METADATA_COLS) for doc in docs_in_db
@@ -160,6 +159,7 @@ def load_csv_and_generate_embeddings(
160159
parser.add_argument("--embed-columns", default="content")
161160
args = parser.parse_args()
162161

163-
load_csv_and_generate_embeddings(
164-
args.path, args.cleardb, args.embed_columns.split(",")
165-
)
162+
with session_scope() as session:
163+
load_csv_and_generate_embeddings(
164+
args.path, session, args.cleardb, args.embed_columns.split(",")
165+
)

src/tests/conftest.py

+2
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ def dbsession(mocker):
3030

3131
with Session() as session:
3232
yield session
33+
session.rollback()
3334

3435
Base.metadata.drop_all(bind=engine)
3536

37+
3638
@pytest.fixture
3739
def client(dbsession):
3840
def get_session_override():

0 commit comments

Comments
 (0)