Skip to content

Commit 7e6f683

Browse files
author
florian
committed
add logging and dockerfile
1 parent 3ec5cc2 commit 7e6f683

22 files changed

+357
-143
lines changed

.gitignore

-1
Original file line numberDiff line numberDiff line change
@@ -166,5 +166,4 @@ cython_debug/
166166
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
167167
#.idea/
168168

169-
/data
170169
.env

Dockerfile

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# syntax=docker/dockerfile:1
22

3-
FROM python:3.9-slim-buster
3+
FROM python:3.10-slim-buster
44

5-
ENV POETRY_VERSION=1.4 \
5+
ENV POETRY_VERSION=1.6 \
66
POETRY_VIRTUALENVS_CREATE=false
77

88
# Install poetry
@@ -18,4 +18,6 @@ RUN poetry install --no-interaction --no-ansi --no-root --no-dev
1818
# Copy Python code to the Docker image
1919
COPY pypi_llm /code/pypi_llm/
2020

21+
ENV PYTHONPATH=/code
22+
2123
CMD [ "python", "pypi_llm/foo.py"]

README.md

+10-45
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,20 @@
11
# pypi-llm
22

3-
[![Release](https://img.shields.io/github/v/release/fpgmaas/pypi-llm)](https://img.shields.io/github/v/release/fpgmaas/pypi-llm)
4-
[![Build status](https://img.shields.io/github/actions/workflow/status/fpgmaas/pypi-llm/main.yml?branch=main)](https://github.com/fpgmaas/pypi-llm/actions/workflows/main.yml?query=branch%3Amain)
5-
[![codecov](https://codecov.io/gh/fpgmaas/pypi-llm/branch/main/graph/badge.svg)](https://codecov.io/gh/fpgmaas/pypi-llm)
6-
[![Commit activity](https://img.shields.io/github/commit-activity/m/fpgmaas/pypi-llm)](https://img.shields.io/github/commit-activity/m/fpgmaas/pypi-llm)
7-
[![License](https://img.shields.io/github/license/fpgmaas/pypi-llm)](https://img.shields.io/github/license/fpgmaas/pypi-llm)
3+
https://drive.google.com/file/d/1huR7-VD3AieBRCcQyRX9MWbPLMb_czjq/view?usp=sharing
84

9-
This is a template repository for Python projects that use Poetry for their dependency management.
5+
# setup
106

11-
- **Github repository**: <https://github.com/fpgmaas/pypi-llm/>
12-
- **Documentation** <https://fpgmaas.github.io/pypi-llm/>
13-
14-
## Getting started with your project
15-
16-
First, create a repository on GitHub with the same name as this project, and then run the following commands:
17-
18-
```bash
19-
git init -b main
20-
git add .
21-
git commit -m "init commit"
22-
git remote add origin [email protected]:fpgmaas/pypi-llm.git
23-
git push -u origin main
247
```
25-
26-
Finally, install the environment and the pre-commit hooks with
27-
28-
```bash
29-
make install
8+
docker build -t pypi-llm .
309
```
3110

32-
You are now ready to start development on your project!
33-
The CI/CD pipeline will be triggered when you open a pull request, merge to main, or when you create a new release.
34-
35-
To finalize the set-up for publishing to PyPi or Artifactory, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/publishing/#set-up-for-pypi).
36-
For activating the automatic documentation with MkDocs, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/mkdocs/#enabling-the-documentation-on-github).
37-
To enable the code coverage reports, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/codecov/).
38-
39-
## Releasing a new version
40-
41-
- Create an API Token on [Pypi](https://pypi.org/).
42-
- Add the API Token to your projects secrets with the name `PYPI_TOKEN` by visiting [this page](https://github.com/fpgmaas/pypi-llm/settings/secrets/actions/new).
43-
- Create a [new release](https://github.com/fpgmaas/pypi-llm/releases/new) on Github.
44-
- Create a new tag in the form `*.*.*`.
45-
46-
For more details, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/cicd/#how-to-trigger-a-release).
47-
48-
---
49-
50-
Repository initiated with [fpgmaas/cookiecutter-poetry](https://github.com/fpgmaas/cookiecutter-poetry).
51-
52-
---
11+
```
12+
docker run --rm \
13+
--env-file .env \
14+
-v $(pwd)/data:/code/data \
15+
pypi-llm \
16+
python /code/pypi_llm/scripts/1_download_dataset.py
17+
```
5318

5419
## total
5520

data/.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Ignore everything in this directory
2+
*
3+
# Except this file
4+
!.gitignore

frontend/app/components/InfoBox.tsx

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import React from "react";
2+
3+
interface InfoBoxProps {
4+
infoBoxVisible: boolean;
5+
}
6+
7+
const InfoBox: React.FC<InfoBoxProps> = ({ infoBoxVisible }) => {
8+
if (!infoBoxVisible) return null;
9+
10+
return (
11+
<div className="w-3/5 bg-white p-6 rounded-lg shadow-lg mt-4">
12+
<h2 className="text-2xl font-bold mb-2">How does this work?</h2>
13+
<p className="text-gray-700">
14+
This application allows you to search for Python packages on PyPi using
15+
natural language. An example query would be "a package that creates
16+
plots and beautiful visualizations".
17+
</p>
18+
<br />
19+
<p className="text-gray-700">
20+
Once you click search, your query will be matched against the summary
21+
and the first part of the description of all PyPi packages with more
22+
than 50 weekly downloads. The results are then scored based on their
23+
similarity and their number of weekly downloads, and the thirty best
24+
results are displayed in the table below.
25+
</p>
26+
</div>
27+
);
28+
};
29+
30+
export default InfoBox;

frontend/app/page.tsx

+29-58
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,28 @@
11
"use client";
22

33
import { useState } from "react";
4-
import axios from "axios";
5-
import SearchResultsTable from "../components/SearchResultsTable";
4+
import { handleSearch, sortResults } from "./utils/search";
5+
import SearchResultsTable from "./components/SearchResultsTable";
6+
import InfoBox from "./components/InfoBox";
67
import { ClipLoader } from "react-spinners";
78

8-
export default function Home() {
9-
const [text, setText] = useState("");
10-
const [results, setResults] = useState([]);
11-
const [sortField, setSortField] = useState("weekly_downloads");
12-
const [sortDirection, setSortDirection] = useState("desc");
13-
const [loading, setLoading] = useState(false);
14-
const [error, setError] = useState("");
15-
const [infoBoxVisible, setInfoBoxVisible] = useState(false);
16-
17-
const handleSearch = async () => {
18-
setLoading(true);
19-
setError("");
20-
try {
21-
const response = await axios.post(
22-
"http://localhost:8000/search",
23-
{
24-
query: text,
25-
},
26-
{
27-
headers: {
28-
"Content-Type": "application/json",
29-
},
30-
},
31-
);
32-
const fetchedResults = response.data.matches;
33-
setResults(sortResults(fetchedResults, sortField, sortDirection));
34-
} catch (error) {
35-
setError("Error fetching search results.");
36-
console.error("Error fetching search results:", error);
37-
} finally {
38-
setLoading(false);
39-
}
40-
};
9+
interface Match {
10+
name: string;
11+
similarity: number;
12+
weekly_downloads: number;
13+
summary: string;
14+
}
4115

42-
const sortResults = (data, field, direction) => {
43-
return [...data].sort((a, b) => {
44-
if (a[field] < b[field]) return direction === "asc" ? -1 : 1;
45-
if (a[field] > b[field]) return direction === "asc" ? 1 : -1;
46-
return 0;
47-
});
48-
};
16+
export default function Home() {
17+
const [text, setText] = useState<string>("");
18+
const [results, setResults] = useState<Match[]>([]);
19+
const [sortField, setSortField] = useState<string>("similarity");
20+
const [sortDirection, setSortDirection] = useState<string>("desc");
21+
const [loading, setLoading] = useState<boolean>(false);
22+
const [error, setError] = useState<string>("");
23+
const [infoBoxVisible, setInfoBoxVisible] = useState<boolean>(false);
4924

50-
const handleSort = (field) => {
25+
const handleSort = (field: string) => {
5126
const direction =
5227
sortField === field && sortDirection === "asc" ? "desc" : "asc";
5328
setSortField(field);
@@ -72,7 +47,16 @@ export default function Home() {
7247
></textarea>
7348
<button
7449
className="w-[250px] p-2 border rounded bg-blue-500 text-white hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500"
75-
onClick={handleSearch}
50+
onClick={() =>
51+
handleSearch(
52+
text,
53+
sortField,
54+
sortDirection,
55+
setResults,
56+
setLoading,
57+
setError,
58+
)
59+
}
7660
>
7761
Search
7862
</button>
@@ -91,20 +75,7 @@ export default function Home() {
9175
</button>
9276
</div>
9377

94-
{infoBoxVisible && (
95-
<div className="w-3/5 bg-white p-6 rounded-lg shadow-lg mt-4">
96-
<h2 className="text-2xl font-bold mb-2">How does this work?</h2>
97-
<p className="text-gray-700">
98-
This application allows you to search for Python packages on PyPi
99-
using natural language. So an example query would be "a package that
100-
creates plots and beautiful visualizations". Once you click search,
101-
your query will be matched against the summary and the first part of
102-
the description of all PyPi packages with more than 50 weekly
103-
downloads, and the 50 most similar results will be displayed in a
104-
table below.
105-
</p>
106-
</div>
107-
)}
78+
<InfoBox infoBoxVisible={infoBoxVisible} />
10879

10980
{results.length > 0 && (
11081
<div className="w-full flex justify-center mt-6">

frontend/app/utils/search.ts

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import axios from "axios";
2+
3+
interface Match {
4+
name: string;
5+
similarity: number;
6+
weekly_downloads: number;
7+
summary: string;
8+
}
9+
10+
export const handleSearch = async (
11+
query: string,
12+
sortField: string,
13+
sortDirection: string,
14+
setResults: React.Dispatch<React.SetStateAction<Match[]>>,
15+
setLoading: React.Dispatch<React.SetStateAction<boolean>>,
16+
setError: React.Dispatch<React.SetStateAction<string>>,
17+
) => {
18+
setLoading(true);
19+
setError("");
20+
try {
21+
const response = await axios.post(
22+
"http://localhost:8000/search",
23+
{
24+
query: query,
25+
},
26+
{
27+
headers: {
28+
"Content-Type": "application/json",
29+
},
30+
},
31+
);
32+
const fetchedResults: Match[] = response.data.matches;
33+
setResults(sortResults(fetchedResults, sortField, sortDirection));
34+
} catch (error) {
35+
setError("Error fetching search results.");
36+
console.error("Error fetching search results:", error);
37+
} finally {
38+
setLoading(false);
39+
}
40+
};
41+
42+
export const sortResults = (
43+
data: Match[],
44+
field: string,
45+
direction: string,
46+
): Match[] => {
47+
return [...data].sort((a, b) => {
48+
if (a[field] < b[field]) return direction === "asc" ? -1 : 1;
49+
if (a[field] > b[field]) return direction === "asc" ? 1 : -1;
50+
return 0;
51+
});
52+
};

poetry.lock

+34-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pypi_llm/api/main.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import logging
2+
13
import polars as pl
24
from dotenv import load_dotenv
35
from fastapi import FastAPI
@@ -6,9 +8,12 @@
68
from sentence_transformers import SentenceTransformer
79

810
from pypi_llm.config import Config
11+
from pypi_llm.utils.logging import setup_logging
912
from pypi_llm.utils.score_calculator import calculate_score
1013
from pypi_llm.vector_database import VectorDatabaseInterface
1114

15+
setup_logging()
16+
1217
app = FastAPI()
1318

1419
load_dotenv()
@@ -55,12 +60,20 @@ class SearchResponse(BaseModel):
5560

5661
@app.post("/search/", response_model=SearchResponse)
5762
async def search(query: QueryModel):
63+
"""
64+
Search for the packages whose summary and description have the highest similarity to the query.
65+
We take the top_k * 2 most similar packages, and then calculate weighted score based on the similarity and weekly downloads.
66+
The top_k packages with the highest score are returned.
67+
"""
68+
69+
logging.info(f"Searching for similar projects. Query: '{query.query}'")
5870
df_matches = vector_database_interface.find_similar(query.query, top_k=query.top_k * 2)
5971
df_matches = df_matches.join(df, how="left", on="name")
6072

73+
logging.info("Found similar projects. Calculating the weighted scores and filtering...")
6174
df_matches = calculate_score(df_matches)
6275
df_matches = df_matches.sort("score", descending=True)
6376
df_matches = df_matches.head(query.top_k)
6477

65-
print("sending")
78+
logging.info("Returning the results...")
6679
return SearchResponse(matches=df_matches.to_dicts())

0 commit comments

Comments
 (0)