Skip to content

Commit 50a958a

Browse files
Merge pull request #144 from bors-ltd/video_date
also store and export video (upload) date
2 parents 6861c0c + 32c6572 commit 50a958a

File tree

5 files changed

+46
-19
lines changed

5 files changed

+46
-19
lines changed

Diff for: yt_fts/db_utils.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from rich.console import Console
55
from rich.table import Table
66

7-
from .utils import show_message
7+
from .utils import show_message, get_date
88
from .config import get_db_path
99

1010

@@ -25,7 +25,8 @@ def make_db(db_path):
2525
"video_id": str,
2626
"video_title": str,
2727
"video_url": str,
28-
"channel_id": str
28+
"channel_id": str,
29+
"video_date": str,
2930
},
3031
pk="video_id",
3132
not_null={"video_title", "video_url"},
@@ -78,16 +79,16 @@ def add_channel_info(channel_id, channel_name, channel_url):
7879
})
7980

8081

81-
def add_video(channel_id, video_id, video_title, video_url):
82+
def add_video(channel_id, video_id, video_title, video_url, video_date):
8283

8384
conn = sqlite3.connect(get_db_path())
8485
cur = conn.cursor()
8586
existing_video = cur.execute("SELECT * FROM Videos WHERE video_id = ?",
8687
(video_id,)).fetchone()
8788

8889
if existing_video is None:
89-
cur.execute("INSERT INTO Videos (video_id, video_title, video_url, channel_id) VALUES (?, ?, ?, ?)",
90-
(video_id, video_title, video_url, channel_id))
90+
cur.execute("INSERT INTO Videos (video_id, video_title, video_url, video_date, channel_id) VALUES (?, ?, ?, ?, ?)",
91+
(video_id, video_title, video_url, video_date, channel_id))
9192
conn.commit()
9293

9394
else:
@@ -144,6 +145,15 @@ def get_title_from_db(video_id):
144145
return db.execute(f"SELECT video_title FROM Videos WHERE video_id = ?", [video_id]).fetchone()[0]
145146

146147

148+
def get_metadata_from_db(video_id):
149+
150+
db = Database(get_db_path())
151+
152+
metadata = db.execute_returning_dicts(f"SELECT * FROM Videos WHERE video_id = ?", [video_id])[0]
153+
metadata["video_date"] = get_date(metadata["video_date"])
154+
return metadata
155+
156+
147157
def get_channel_name_from_id(channel_id):
148158

149159
db = Database(get_db_path())

Diff for: yt_fts/download.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
import tempfile
32
import subprocess, re, os, sqlite3, json
43

@@ -8,7 +7,7 @@
87

98
from .config import get_db_path
109
from .db_utils import add_video
11-
from .utils import parse_vtt
10+
from .utils import parse_vtt, get_date
1211
from urllib.parse import urlparse
1312

1413
from rich.progress import track
@@ -215,9 +214,10 @@ def vtt_to_db(dir_path):
215214
vid_json = json.load(f)
216215

217216
vid_title = vid_json['title']
217+
vid_date = get_date(vid_json['upload_date'])
218218
channel_id = vid_json['channel_id']
219219

220-
add_video(channel_id, vid_id, vid_title, vid_url)
220+
add_video(channel_id, vid_id, vid_title, vid_url, vid_date)
221221

222222
vtt_json = parse_vtt(vtt)
223223

Diff for: yt_fts/export.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from .db_utils import (
66
search_channel, search_video, search_all,
7-
get_channel_name_from_video_id, get_title_from_db
7+
get_channel_name_from_video_id, get_metadata_from_db,
88
)
99

1010
from .utils import time_to_secs, show_message
@@ -35,17 +35,24 @@ def export_fts(text, scope, channel_id=None, video_id=None):
3535

3636
with open(file_name, 'w', newline='') as csvfile:
3737
writer = csv.writer(csvfile)
38-
writer.writerow(['Channel Name','Video Title', 'Quote', 'Time Stamp', 'Link'])
38+
writer.writerow(['Channel Name','Video Title', 'Date', 'Quote', 'Time Stamp', 'Link'])
3939

4040
for quote in res:
4141
video_id = quote["video_id"]
4242
channel_name = get_channel_name_from_video_id(video_id)
43-
video_title = get_title_from_db(video_id)
43+
metadata = get_metadata_from_db(video_id)
4444
time_stamp = quote["start_time"]
4545
subs = quote["text"]
4646
time = time_to_secs(time_stamp)
4747

48-
writer.writerow([channel_name,video_title, subs.strip(), time_stamp, f"https://youtu.be/{video_id}?t={time}"])
48+
writer.writerow([
49+
channel_name,
50+
metadata['video_title'],
51+
metadata['video_date'],
52+
subs.strip(),
53+
time_stamp,
54+
f"https://youtu.be/{video_id}?t={time}"
55+
])
4956

5057
console = Console()
5158

Diff for: yt_fts/search.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def print_fts_res(res, query):
5555
quote_match["channel_name"] = get_channel_name_from_video_id(video_id)
5656
channel_names.append(quote_match["channel_name"])
5757

58-
quote_match["video_title"] = get_title_from_db(video_id)
58+
quote_match["metadata"] = get_metadata_from_db(video_id)
5959
quote_match["subs"] = bold_query_matches(quote["text"].strip(), query)
6060
quote_match["time_stamp"] = time_stamp
6161
quote_match["video_id"] = video_id
@@ -94,17 +94,19 @@ def print_fts_res(res, query):
9494
fts_dict = {}
9595
for quote in fts_res:
9696
channel_name = quote["channel_name"]
97-
video_name = quote["video_title"]
97+
metadata = quote["metadata"]
98+
video_name = metadata["video_title"]
99+
video_date = metadata["video_date"]
98100
quote_data = {
99101
"quote": quote["subs"],
100102
"time_stamp": quote["time_stamp"],
101103
"link": quote["link"]
102104
}
103105
if channel_name not in fts_dict:
104106
fts_dict[channel_name] = {}
105-
if video_name not in fts_dict[channel_name]:
106-
fts_dict[channel_name][video_name] = []
107-
fts_dict[channel_name][video_name].append(quote_data)
107+
if (video_name, video_date) not in fts_dict[channel_name]:
108+
fts_dict[channel_name][(video_name, video_date)] = []
109+
fts_dict[channel_name][(video_name, video_date)].append(quote_data)
108110

109111

110112
# Sort the list by the total number of quotes in each channel
@@ -119,8 +121,8 @@ def print_fts_res(res, query):
119121
video_list = list(videos.items())
120122
video_list.sort(key=lambda x: len(x[1]))
121123

122-
for video_name, quotes in video_list:
123-
console.print(f" [bold][blue]{video_name}[/blue][/bold]")
124+
for (video_name, video_date), quotes in video_list:
125+
console.print(f" [bold][blue]{video_name}[/blue][/bold] ({video_date})")
124126
console.print("")
125127

126128
# Sort the quotes by timestamp

Diff for: yt_fts/utils.py

+8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
This is where I'm putting all the functions that don't belong anywhere else
33
"""
4+
import datetime
45
import re
56
import sqlite3
67

@@ -91,6 +92,13 @@ def get_time_delta(timestamp1, timestamp2):
9192
return diff
9293

9394

95+
def get_date(date_string):
96+
# Python 3.11 would support datimetime.date.fromisoformat('YYYYMMDD') directly
97+
if '-' in date_string:
98+
return datetime.date.fromisoformat(date_string)
99+
return datetime.datetime.strptime(date_string, '%Y%m%d').date()
100+
101+
94102
# check if semantic search has been enabled for channel
95103
def check_ss_enabled(channel_id=None):
96104

0 commit comments

Comments
 (0)