Skip to content

Commit 1cca922

Browse files
committed
feature: add favicon scraper. Fixes #40
1 parent 33d0873 commit 1cca922

14 files changed

+101
-375
lines changed

lib/link_thumbnailer/configuration.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def initialize
4141
%r{^http://pixel\.quantserve\.com/},
4242
%r{^http://s7\.addthis\.com/}
4343
]
44-
@attributes = [:title, :images, :description, :videos]
44+
@attributes = [:title, :images, :description, :videos, :favicon]
4545
@graders = [
4646
->(description) { ::LinkThumbnailer::Graders::Length.new(description) },
4747
->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
require 'link_thumbnailer/model'
2+
3+
module LinkThumbnailer
4+
module Models
5+
class Favicon < ::LinkThumbnailer::Model
6+
7+
attr_reader :uri
8+
9+
def initialize(uri)
10+
@uri = uri
11+
end
12+
13+
def to_s
14+
uri.to_s
15+
end
16+
17+
def as_json(*)
18+
{
19+
src: to_s
20+
}
21+
end
22+
23+
end
24+
end
25+
end

lib/link_thumbnailer/models/website.rb

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ module LinkThumbnailer
44
module Models
55
class Website < ::LinkThumbnailer::Model
66

7-
attr_accessor :url, :title, :description, :images, :videos
7+
attr_accessor :url, :title, :description, :images, :videos, :favicon
88

99
def initialize
1010
@images = []
@@ -39,6 +39,7 @@ def images
3939
def as_json(*)
4040
{
4141
url: url.to_s,
42+
favicon: favicon,
4243
title: title,
4344
description: description,
4445
images: images,

lib/link_thumbnailer/scraper.rb

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
require 'link_thumbnailer/scrapers/opengraph/images'
1313
require 'link_thumbnailer/scrapers/default/videos'
1414
require 'link_thumbnailer/scrapers/opengraph/videos'
15+
require 'link_thumbnailer/scrapers/default/favicon'
16+
require 'link_thumbnailer/scrapers/opengraph/favicon'
1517

1618
module LinkThumbnailer
1719
class Scraper < ::SimpleDelegator
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
require 'link_thumbnailer/scrapers/default/base'
2+
require 'link_thumbnailer/models/favicon'
3+
4+
module LinkThumbnailer
5+
module Scrapers
6+
module Default
7+
class Favicon < ::LinkThumbnailer::Scrapers::Default::Base
8+
9+
def value
10+
modelize(to_uri(href)).to_s
11+
end
12+
13+
private
14+
15+
def to_uri(href)
16+
::URI.parse(href)
17+
rescue ::URI::InvalidURIError
18+
nil
19+
end
20+
21+
def href
22+
node.attributes['href'].value.to_s if node
23+
end
24+
25+
def node
26+
document.xpath("//link[contains(@rel, 'icon')]").first
27+
end
28+
29+
def modelize(uri)
30+
model_class.new(uri)
31+
end
32+
33+
end
34+
end
35+
end
36+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
require 'link_thumbnailer/scrapers/opengraph/base'
2+
3+
module LinkThumbnailer
4+
module Scrapers
5+
module Opengraph
6+
class Favicon < ::LinkThumbnailer::Scrapers::Opengraph::Base
7+
8+
def value
9+
nil
10+
end
11+
12+
end
13+
end
14+
end
15+
end

spec/configuration_spec.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
it { expect(instance.verify_ssl).to eq(true) }
1010
it { expect(instance.http_timeout).to eq(5) }
1111
it { expect(instance.blacklist_urls).to_not be_empty }
12-
it { expect(instance.attributes).to eq([:title, :images, :description, :videos]) }
12+
it { expect(instance.attributes).to eq([:title, :images, :description, :videos, :favicon]) }
1313
it { expect(instance.graders).to_not be_empty }
1414
it { expect(instance.description_min_length).to eq(25) }
1515
it { expect(instance.positive_regex).to_not be_nil }

spec/examples/empty_og_image_example.html

-9
This file was deleted.

spec/fixture_spec.rb

+6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
let(:url) { 'http://foo.com' }
66
let(:png_url) { 'http://foo.com/foo.png' }
77
let(:video_url) { 'http://foo.com/foo.swf' }
8+
let(:favicon) { 'http://foo.com/foo.ico' }
89
let(:png) { File.open(File.dirname(__FILE__) + '/fixtures/foo.png') }
910
let(:action) { LinkThumbnailer.generate(url) }
1011

@@ -22,6 +23,7 @@
2223

2324
let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/og_valid_example.html').read() }
2425

26+
it { expect(action.favicon).to eq(favicon) }
2527
it { expect(action.title).to eq(title) }
2628
it { expect(action.description).to eq(description) }
2729
it { expect(action.images.count).to eq(1) }
@@ -42,6 +44,7 @@
4244
stub_request(:get, png_url_2).to_return(status: 200, body: png_2, headers: {})
4345
end
4446

47+
it { expect(action.favicon).to eq('') }
4548
it { expect(action.title).to eq(title) }
4649
it { expect(action.description).to eq(description) }
4750
it { expect(action.images.count).to eq(2) }
@@ -55,6 +58,7 @@
5558
let(:video_url_2) { 'http://foo.com/bar.swf' }
5659
let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/og_valid_multi_video_example.html').read() }
5760

61+
it { expect(action.favicon).to eq('') }
5862
it { expect(action.title).to eq(title) }
5963
it { expect(action.description).to eq(description) }
6064
it { expect(action.videos.count).to eq(2) }
@@ -84,6 +88,7 @@
8488
let(:title) { 'Title from meta' }
8589
let(:description) { 'Description from meta' }
8690

91+
it { expect(action.favicon).to eq(favicon) }
8792
it { expect(action.title).to eq(title) }
8893
it { expect(action.description).to eq(description) }
8994

@@ -94,6 +99,7 @@
9499
let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/default_from_body.html').read() }
95100
let(:description) { 'Description from body' }
96101

102+
it { expect(action.favicon).to eq(favicon) }
97103
it { expect(action.description).to eq(description) }
98104
it { expect(action.images.count).to eq(1) }
99105
it { expect(action.images.first.src.to_s).to eq(png_url) }

spec/fixtures/default_from_body.html

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
<html>
22
<head>
33
<title>Title from meta</title>
4+
<link rel="shortcut icon" href="http://foo.com/foo.ico">
45
</head>
56
<body>
67

spec/fixtures/default_from_meta.html

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
<head>
33
<title>Title from meta</title>
44
<meta content="Description from meta" name="description">
5+
<link rel="icon whatever" href="http://foo.com/foo.ico">
56
</head>
67
<body>
78

0 commit comments

Comments
 (0)