Skip to content

Commit 098eb9c

Browse files
authored
Merge pull request #17 from pct-org/improvements
refactor: Change from pMap to pTimes
2 parents a881b2a + 5f22e31 commit 098eb9c

File tree

5 files changed

+92
-75
lines changed

5 files changed

+92
-75
lines changed

apps/scraper/src/scraper.module.ts

+7-9
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { Inject, Logger, Module, OnApplicationBootstrap } from '@nestjs/common'
22
import { ScheduleModule, SchedulerRegistry } from '@nestjs/schedule'
33
import { MongooseModule } from '@nestjs/mongoose'
44
import { CronJob } from 'cron'
5-
import * as pMap from 'p-map'
5+
import * as pLimit from 'p-limit'
66

77
import { ModelsModule } from './shared/models.module'
88
import { ConfigModule } from './shared/config/config.module'
@@ -60,21 +60,19 @@ export class ScraperModule implements OnApplicationBootstrap {
6060

6161
this.logger.log(`Enabled cron on '${this.configService.get(ConfigService.CRON_TIME)}'`)
6262

63-
if (this.configService.get(ConfigService.SCRAPE_ON_START)) {
63+
if (this.configService.get(ConfigService.SCRAPE_ON_START) || true) {
6464
this.scrapeConfigs()
6565
}
6666
}
6767

6868
private async scrapeConfigs(): Promise<void> {
6969
this.logger.log('Start scraping')
7070

71-
await pMap(
72-
this.providersService.getProviders(),
73-
(provider) => provider.scrapeConfigs(),
74-
{
75-
concurrency: 1
76-
}
77-
)
71+
const limit = pLimit(1)
72+
73+
await Promise.all(this.providersService.getProviders().map((provider) => (
74+
limit(() => provider.scrapeConfigs())
75+
)))
7876
}
7977

8078
}

libs/scraper/providers/base/src/base.provider.ts

+53-43
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { formatTorrents } from '@pct-org/torrent/utils'
77
import { BaseHelper } from '@pct-org/scraper/helpers/base'
88
import { MOVIE_TYPE } from '@pct-org/types/movie'
99
import { SHOW_TYPE } from '@pct-org/types/show'
10+
import * as pLimit from 'p-limit'
1011

1112
import {
1213
ScrapedItem,
@@ -62,24 +63,27 @@ export abstract class BaseProvider {
6263
/**
6364
* Starts scraping the provided configs
6465
*/
65-
async scrapeConfigs(): Promise<void> {
66+
public async scrapeConfigs(): Promise<void> {
6667
this.logger.log(`Started scraping...`)
6768

68-
await pMap(
69-
this.configs,
70-
(config) => this.scrapeConfig(config),
71-
{
72-
concurrency: 1
73-
}
74-
)
69+
const limit = pLimit(1)
70+
71+
await Promise.all(this.configs.map((config) => (
72+
limit(() => this.scrapeConfig(config))
73+
)))
7574

7675
this.logger.log(`Done scraping`)
7776
}
7877

7978
/**
8079
* Set the configuration to scrape with.
8180
*/
82-
protected setConfig({ query, contentType, regexps = [], language = 'en' }: ScraperProviderConfig): void {
81+
protected setConfig({
82+
query,
83+
contentType,
84+
regexps = [],
85+
language = 'en'
86+
}: ScraperProviderConfig): void {
8387
this.contentType = contentType
8488
this.query = query
8589
this.language = language
@@ -105,36 +109,32 @@ export abstract class BaseProvider {
105109
this.logger.log(`Total pages ${totalPages}`)
106110

107111
const torrents = await this.getAllTorrents(totalPages)
108-
const allContent = await this.getAllContent(torrents)
112+
const contents = await this.getAllContent(torrents)
109113

110-
this.logger.log(`Total content ${allContent.length}`)
114+
this.logger.log(`Total content ${contents.length}`)
111115

112-
await pMap(
113-
allContent,
114-
async (content) => {
115-
const isInBlacklist = await this.isItemBlackListed(content)
116+
const limit = pLimit(this.maxWebRequests)
116117

117-
// Only get data for this item if it's not in the blacklist
118-
if (!isInBlacklist) {
119-
try {
120-
await this.enhanceAndImport(content)
118+
await Promise.all(contents.map((content) => limit(async() => {
119+
const isInBlacklist = await this.isItemBlackListed(content)
121120

122-
} catch (err) {
123-
const errorMessage = err.message || err
121+
// Only get data for this item if it's not in the blacklist
122+
if (!isInBlacklist) {
123+
try {
124+
await this.enhanceAndImport(content)
124125

125-
this.logger.error(`BaseProvider.scrapeConfig: ${errorMessage}`, err.stack)
126+
} catch (err) {
127+
const errorMessage = err.message || err
126128

127-
// Log the content so it can be better debugged from logs
128-
if (errorMessage.includes('Could not find any data with slug')) {
129-
this.logger.error(JSON.stringify(content))
130-
}
129+
this.logger.error(`BaseProvider.scrapeConfig: ${errorMessage}`, err.stack)
130+
131+
// Log the content so it can be better debugged from logs
132+
if (errorMessage.includes('Could not find any data with slug')) {
133+
this.logger.error(JSON.stringify(content))
131134
}
132135
}
133-
},
134-
{
135-
concurrency: this.maxWebRequests
136136
}
137-
)
137+
})))
138138
} catch (err) {
139139
this.logger.error(`Catch BaseProvider.scrapeConfig: ${err.message || err}`, err.stack)
140140
}
@@ -146,7 +146,10 @@ export abstract class BaseProvider {
146146
* @returns {Promise<Boolean|Error>}
147147
*/
148148
protected async isItemBlackListed(content: ScrapedItem): Promise<boolean | Error> {
149-
const { slug, imdb } = content
149+
const {
150+
slug,
151+
imdb
152+
} = content
150153

151154
const blacklistedItem = await this.blackListModel.findOne({
152155
$or: [
@@ -253,7 +256,11 @@ export abstract class BaseProvider {
253256
/**
254257
* Extract content information based on a regex.
255258
*/
256-
abstract extractContent({ torrent, regex, lang }): ScrapedItem | undefined
259+
abstract extractContent({
260+
torrent,
261+
regex,
262+
lang
263+
}): ScrapedItem | undefined
257264

258265
/**
259266
* Get content info from a given torrent.
@@ -307,7 +314,8 @@ export abstract class BaseProvider {
307314
return items.set(slug, item)
308315
}, {
309316
concurrency: 1
310-
}).then(() => Array.from(items.values()))
317+
})
318+
.then(() => Array.from(items.values()))
311319
}
312320

313321
/**
@@ -352,7 +360,7 @@ export abstract class BaseProvider {
352360
getAllTorrents(totalPages: number): Promise<Array<any>> {
353361
let torrents = []
354362

355-
return pTimes(totalPages, async (page) => {
363+
return pTimes(totalPages, async(page) => {
356364
this.logger.debug(`Started searching ${this.name} on page ${page + 1} out of ${totalPages}`)
357365

358366
// Get the page
@@ -364,17 +372,19 @@ export abstract class BaseProvider {
364372
}, {
365373
concurrency: 1,
366374
stopOnError: false
367-
}).then(() => {
368-
this.logger.log(`Found ${torrents.length} torrents.`)
375+
})
376+
.then(() => {
377+
this.logger.log(`Found ${torrents.length} torrents.`)
369378

370-
return Promise.resolve(torrents)
371-
}).catch((err) => {
372-
// Only log the errors
373-
this.logger.error(`Catch BaseProvider.getAllTorrents: ${err.message || err}`)
379+
return Promise.resolve(torrents)
380+
})
381+
.catch((err) => {
382+
// Only log the errors
383+
this.logger.error(`Catch BaseProvider.getAllTorrents: ${err.message || err}`)
374384

375-
// We still want to resolve all the pages that did go well
376-
return Promise.resolve(torrents)
377-
})
385+
// We still want to resolve all the pages that did go well
386+
return Promise.resolve(torrents)
387+
})
378388
}
379389

380390
/**

libs/scraper/providers/eztv/src/eztv-provider.service.ts

+19-23
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { Inject, Injectable, Logger } from '@nestjs/common'
22
import { BaseProvider, ScrapedItem, ScraperProviderConfig } from '@pct-org/scraper/providers/base'
33
import { EztvApi } from '@pct-org/eztv-api'
4-
import * as pMap from 'p-map'
4+
import * as pLimit from 'p-limit'
55
import { ShowHelperService } from '@pct-org/scraper/helpers/show'
66
import { SHOW_TYPE } from '@pct-org/types/show'
77

@@ -36,36 +36,32 @@ export class EztvProviderService extends BaseProvider {
3636

3737
this.logger.log(`${this.name}: Found ${contents.length} ${this.contentType}s.`)
3838

39-
await pMap(
40-
contents,
41-
async (content) => {
42-
const isInBlacklist = await this.isItemBlackListed(content)
39+
const limit = pLimit(this.maxWebRequests)
4340

44-
// Only get data for this item if it's not in the blacklist
45-
if (!isInBlacklist) {
46-
try {
47-
// Get full show data
48-
const show: ScrapedItem = await this.api.getShowData(content)
41+
await Promise.all(contents.map((content) => limit(async() => {
42+
const isInBlacklist = await this.isItemBlackListed(content)
4943

50-
// Enhance and import the show
51-
await this.enhanceAndImport(show)
44+
// Only get data for this item if it's not in the blacklist
45+
if (!isInBlacklist) {
46+
try {
47+
// Get full show data
48+
const show: ScrapedItem = await this.api.getShowData(content)
5249

53-
} catch (err) {
54-
const errorMessage = err.message || err
50+
// Enhance and import the show
51+
await this.enhanceAndImport(show)
5552

56-
this.logger.error(`EztvProviderService.scrapeConfig: ${errorMessage}`, err.stack)
53+
} catch (err) {
54+
const errorMessage = err.message || err
5755

58-
// Log the content so it can be better debugged from logs
59-
if (errorMessage.includes('Could not find any data with slug')) {
60-
this.logger.error(JSON.stringify(content))
61-
}
56+
this.logger.error(`EztvProviderService.scrapeConfig: ${errorMessage}`, err.stack)
57+
58+
// Log the content so it can be better debugged from logs
59+
if (errorMessage.includes('Could not find any data with slug')) {
60+
this.logger.error(JSON.stringify(content))
6261
}
6362
}
64-
},
65-
{
66-
concurrency: this.maxWebRequests
6763
}
68-
)
64+
})))
6965
}
7066

7167
extractContent({ torrent, regex, lang }: { torrent: any; regex: any; lang: any }): undefined {

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
"node-tvdb": "^4.1.0",
7373
"omdb-api-pt": "^2.0.1",
7474
"opensubtitles-api": "^5.1.2",
75+
"p-limit": "^3.1.0",
7576
"p-map": "^4.0.0",
7677
"p-times": "^3.0.0",
7778
"pm2": "^5.1.1",

yarn.lock

+12
Original file line numberDiff line numberDiff line change
@@ -9767,6 +9767,13 @@ p-limit@^3.0.1, p-limit@^3.0.2:
97679767
dependencies:
97689768
p-try "^2.0.0"
97699769

9770+
p-limit@^3.1.0:
9771+
version "3.1.0"
9772+
resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b"
9773+
integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==
9774+
dependencies:
9775+
yocto-queue "^0.1.0"
9776+
97709777
p-locate@^3.0.0:
97719778
version "3.0.0"
97729779
resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-3.0.0.tgz#322d69a05c0264b25997d9f40cd8a891ab0064a4"
@@ -13792,6 +13799,11 @@ [email protected]:
1379213799
resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50"
1379313800
integrity sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==
1379413801

13802+
yocto-queue@^0.1.0:
13803+
version "0.1.0"
13804+
resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b"
13805+
integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==
13806+
1379513807
yts-api-pt@^2.0.0:
1379613808
version "2.0.0"
1379713809
resolved "https://registry.yarnpkg.com/yts-api-pt/-/yts-api-pt-2.0.0.tgz#f1048d258e3ab5b1e0862e9732ea59fab1ead64a"

0 commit comments

Comments
 (0)