Skip to content

Commit 81082a5

Browse files
committed
Enable specifying explicit list of external posts to display
1 parent 7e8ca8f commit 81082a5

File tree

1 file changed

+77
-18
lines changed

1 file changed

+77
-18
lines changed

_plugins/external-posts.rb

+77-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
require 'feedjira'
22
require 'httparty'
33
require 'jekyll'
4+
require 'nokogiri'
5+
require 'time'
46

57
module ExternalPosts
68
class ExternalPostsGenerator < Jekyll::Generator
@@ -10,27 +12,84 @@ class ExternalPostsGenerator < Jekyll::Generator
1012
def generate(site)
1113
if site.config['external_sources'] != nil
1214
site.config['external_sources'].each do |src|
13-
p "Fetching external posts from #{src['name']}:"
14-
xml = HTTParty.get(src['rss_url']).body
15-
feed = Feedjira.parse(xml)
16-
feed.entries.each do |e|
17-
p "...fetching #{e.url}"
18-
slug = e.title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
19-
path = site.in_source_dir("_posts/#{slug}.md")
20-
doc = Jekyll::Document.new(
21-
path, { :site => site, :collection => site.collections['posts'] }
22-
)
23-
doc.data['external_source'] = src['name'];
24-
doc.data['feed_content'] = e.content;
25-
doc.data['title'] = "#{e.title}";
26-
doc.data['description'] = e.summary;
27-
doc.data['date'] = e.published;
28-
doc.data['redirect'] = e.url;
29-
site.collections['posts'].docs << doc
15+
puts "Fetching external posts from #{src['name']}:"
16+
if src['rss_url']
17+
fetch_from_rss(site, src)
18+
elsif src['posts']
19+
fetch_from_urls(site, src)
3020
end
3121
end
3222
end
3323
end
34-
end
3524

25+
def fetch_from_rss(site, src)
26+
xml = HTTParty.get(src['rss_url']).body
27+
feed = Feedjira.parse(xml)
28+
process_entries(site, src, feed.entries)
29+
end
30+
31+
def process_entries(site, src, entries)
32+
entries.each do |e|
33+
puts "...fetching #{e.url}"
34+
create_document(site, src['name'], e.url, {
35+
title: e.title,
36+
content: e.content,
37+
summary: e.summary,
38+
published: e.published
39+
})
40+
end
41+
end
42+
43+
def create_document(site, source_name, url, content)
44+
slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
45+
path = site.in_source_dir("_posts/#{slug}.md")
46+
doc = Jekyll::Document.new(
47+
path, { :site => site, :collection => site.collections['posts'] }
48+
)
49+
doc.data['external_source'] = source_name
50+
doc.data['title'] = content[:title]
51+
doc.data['feed_content'] = content[:content]
52+
doc.data['description'] = content[:summary]
53+
doc.data['date'] = content[:published]
54+
doc.data['redirect'] = url
55+
site.collections['posts'].docs << doc
56+
end
57+
58+
def fetch_from_urls(site, src)
59+
src['posts'].each do |post|
60+
puts "...fetching #{post['url']}"
61+
content = fetch_content_from_url(post['url'])
62+
content[:published] = parse_published_date(post['published_date'])
63+
create_document(site, src['name'], post['url'], content)
64+
end
65+
end
66+
67+
def parse_published_date(published_date)
68+
case published_date
69+
when String
70+
Time.parse(published_date).utc
71+
when Date
72+
published_date.to_time.utc
73+
else
74+
raise "Invalid date format for #{published_date}"
75+
end
76+
end
77+
78+
def fetch_content_from_url(url)
79+
html = HTTParty.get(url).body
80+
parsed_html = Nokogiri::HTML(html)
81+
82+
title = parsed_html.at('head title')&.text || ''
83+
description = parsed_html.at('head meta[name="description"]')&.attr('content') || ''
84+
body_content = parsed_html.at('body')&.inner_html || ''
85+
86+
{
87+
title: title,
88+
content: body_content,
89+
summary: description
90+
# Note: The published date is now added in the fetch_from_urls method.
91+
}
92+
end
93+
94+
end
3695
end

0 commit comments

Comments
 (0)