Skip to content

Commit 99570aa

Browse files
committed
Merge pull request #13 from Swader/feature-search
Added Search API
2 parents 431309d + a42f8c5 commit 99570aa

14 files changed

+694
-36
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,6 @@ build
3333
composer.lock
3434
docs
3535
vendor
36+
public
3637

3738
index.php

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
#Changelog
22
All notable changes will be documented in this file
33

4+
## 0.4 - June 11th, 2015
5+
6+
- [Feature] Added Search API
7+
- [Feature] Added SearchInfo: apart from Entites in a regular EntityIterator as usual, the Search API returns a SearchInfo object, too. See README.
8+
49
## 0.3 - May 17th, 2015
510

611
### Internal changes

README.md

+16
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,22 @@ $url = $crawl->buildUrl();
312312
$url->call();
313313
```
314314

315+
## Search API
316+
317+
The Search API is used to quickly search across data obtained through Bulk or Crawl API.
318+
319+
```php
320+
$diffbot = new Diffbot('my_token');
321+
$search = $diffbot->search('author:"Miles Johnson" AND type:article')->call();
322+
323+
324+
foreach ($search as $article) {
325+
echo $article->getTitle();
326+
}
327+
```
328+
329+
Use Search APIs `setCol` method to target a specific collection only - otherwise, all your token's collections are searched.
330+
315331
## Testing
316332

317333
Just run PHPUnit in the root folder of the cloned project.

src/Abstracts/Api.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public function buildUrl()
9797
{
9898
$url = rtrim($this->apiUrl, '/').'?';
9999

100-
if (strcmp($url,'crawl') !== 0) {
100+
if (strcmp($this->url,'crawl') !== 0) {
101101
// Add Token
102102
$url .= 'token=' . $this->diffbot->getToken();
103103

src/Api/Search.php

+157
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
<?php
2+
3+
namespace Swader\Diffbot\Api;
4+
5+
use Swader\Diffbot\Abstracts\Api;
6+
use Swader\Diffbot\Entity\SearchInfo;
7+
use Swader\Diffbot\Traits\DiffbotAware;
8+
9+
/**
10+
* Class Search
11+
* @see https://www.diffbot.com/dev/docs/search/
12+
* @package Swader\Diffbot\Api
13+
*/
14+
class Search extends Api
15+
{
16+
use DiffbotAware;
17+
18+
/** @var string API URL to which to send the request */
19+
protected $apiUrl = 'https://api.diffbot.com/v3/search';
20+
21+
/** @var string */
22+
protected $col = null;
23+
24+
/** @var string Search query to execute */
25+
protected $query = '';
26+
27+
/** @var SearchInfo */
28+
protected $info;
29+
30+
const SEARCH_ALL = 'all';
31+
32+
/**
33+
* Search query.
34+
* @see https://www.diffbot.com/dev/docs/search/#query
35+
* @param string $q
36+
*/
37+
public function __construct($q)
38+
{
39+
$this->query = $q;
40+
}
41+
42+
/**
43+
* Name of the collection (Crawlbot or Bulk API job name) to search.
44+
* By default the search will operate on all of your token's collections.
45+
*
46+
* @param null|string $col
47+
* @return $this
48+
*/
49+
public function setCol($col = null)
50+
{
51+
if ($col !== null) {
52+
$this->otherOptions['col'] = $col;
53+
} else {
54+
unset($this->otherOptions['col']);
55+
}
56+
57+
return $this;
58+
}
59+
60+
/**
61+
* Number of results to return. Default is 20. To return all results in
62+
* the search, pass num=all.
63+
* @param int $num
64+
* @return $this
65+
*/
66+
public function setNum($num = 20)
67+
{
68+
if (!is_numeric($num) && $num !== self::SEARCH_ALL) {
69+
throw new \InvalidArgumentException(
70+
'Argument can only be numeric or "all" to return all results.'
71+
);
72+
}
73+
$this->otherOptions['num'] = $num;
74+
75+
return $this;
76+
}
77+
78+
/**
79+
* Ordinal position of first result to return. (First position is 0.)
80+
* Default is 0.
81+
* @param int $start
82+
* @return $this
83+
*/
84+
public function setStart($start = 0)
85+
{
86+
if (!is_numeric($start)) {
87+
throw new \InvalidArgumentException(
88+
'Argument can only be numeric.'
89+
);
90+
}
91+
$this->otherOptions['start'] = $start;
92+
93+
return $this;
94+
}
95+
96+
/**
97+
* Builds out the URL string that gets requested once `call()` is called
98+
*
99+
* @return string
100+
*/
101+
public function buildUrl()
102+
{
103+
104+
$url = rtrim($this->apiUrl, '/') . '?';
105+
106+
// Add token
107+
$url .= 'token=' . $this->diffbot->getToken();
108+
109+
// Add query
110+
$url .= '&query=' . urlencode($this->query);
111+
112+
// Add other options
113+
foreach ($this->otherOptions as $option => $value) {
114+
$url .= '&' . $option . '=' . $value;
115+
}
116+
117+
return $url;
118+
}
119+
120+
/**
121+
* If you pass in `true`, you get back a SearchInfo object related to the
122+
* last call. Keep in mind that passing in true before calling a default
123+
* call() will implicitly call the call(), and then get the SearchInfo.
124+
*
125+
* So:
126+
*
127+
* $searchApi->call() // gets entities
128+
* $searchApi->call(true) // gets SearchInfo about the executed query
129+
*
130+
* @todo: remove error avoidance when issue 12 is fixed: https://github.com/Swader/diffbot-php-client/issues/12
131+
* @param bool $info
132+
* @return \Swader\Diffbot\Entity\EntityIterator|SearchInfo
133+
*/
134+
public function call($info = false)
135+
{
136+
if (!$info) {
137+
$ei = parent::call();
138+
139+
set_error_handler(function() { /* ignore errors */ });
140+
$arr = $ei->getResponse()->json(['big_int_strings' => true]);
141+
restore_error_handler();
142+
143+
unset($arr['request']);
144+
unset($arr['objects']);
145+
146+
$this->info = new SearchInfo($arr);
147+
148+
return $ei;
149+
}
150+
151+
if ($info && !$this->info) {
152+
$this->call();
153+
}
154+
155+
return $this->info;
156+
}
157+
}

src/Diffbot.php

+16
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use Swader\Diffbot\Api\Crawl;
66
use Swader\Diffbot\Api\Custom;
7+
use Swader\Diffbot\Api\Search;
78
use Swader\Diffbot\Exceptions\DiffbotException;
89
use Swader\Diffbot\Api\Product;
910
use Swader\Diffbot\Api\Image;
@@ -252,4 +253,19 @@ public function crawl($name = null, Api $api = null)
252253
return $api->registerDiffbot($this);
253254
}
254255

256+
/**
257+
* Search query.
258+
* @see https://www.diffbot.com/dev/docs/search/#query
259+
* @param string $q
260+
* @return Search
261+
*/
262+
public function search($q)
263+
{
264+
$api = new Search($q);
265+
if (!$this->getHttpClient()) {
266+
$this->setHttpClient();
267+
$this->setEntityFactory();
268+
}
269+
return $api->registerDiffbot($this);
270+
}
255271
}

src/Entity/SearchInfo.php

+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
<?php
2+
3+
namespace Swader\Diffbot\Entity;
4+
5+
use Swader\Diffbot\Abstracts\Entity;
6+
7+
class SearchInfo extends Entity
8+
{
9+
10+
/**
11+
* Should always return "searchInfo"
12+
* @return string
13+
*/
14+
public function getType()
15+
{
16+
return $this->data['searchInfo'];
17+
}
18+
19+
/**
20+
* Current UTC time as timestamp
21+
* @return int
22+
*/
23+
public function getCurrentTimeUTC()
24+
{
25+
return (int)$this->data['currentTimeUTC'];
26+
}
27+
28+
/**
29+
* Response time in milliseconds. Time it took to process the query on
30+
* Diffbot's end.
31+
* @return int
32+
*/
33+
public function getResponseTimeMS()
34+
{
35+
return (int)$this->data['responseTimeMS'];
36+
}
37+
38+
/**
39+
* Number of results skipped for any reason
40+
* @todo: find out why results might be omitted
41+
* @return int
42+
*/
43+
public function getNumResultsOmitted()
44+
{
45+
return (int)$this->data['numResultsOmitted'];
46+
}
47+
48+
/**
49+
* Number of skipped shards
50+
* @todo: find out what shards are
51+
* @return int
52+
*/
53+
public function getNumShardsSkipped()
54+
{
55+
return (int)$this->data['numShardsSkipped'];
56+
}
57+
58+
/**
59+
* Total number of shards
60+
* @todo: find out what shards are
61+
* @return int
62+
*/
63+
public function getTotalShards()
64+
{
65+
return (int)$this->data['totalShards'];
66+
}
67+
68+
/**
69+
* Total number of documents in collection.
70+
* Should resemble the total number you got on the crawl job.
71+
* @todo: find out why not identical
72+
* @return int
73+
*/
74+
public function getDocsInCollection()
75+
{
76+
return (int)$this->data['docsInCollection'];
77+
}
78+
79+
/**
80+
* Number of results that match - NOT the number of *returned* results!
81+
* @return int
82+
*/
83+
public function getHits()
84+
{
85+
return (int)$this->data['hits'];
86+
}
87+
88+
/**
89+
* Returns an assoc. array containing the following keys and example values:
90+
*
91+
92+
"fullQuery" => "type:json AND (author:\"Miles Johnson\" AND type:article)",
93+
"queryLanguageAbbr" => "xx",
94+
"queryLanguage" => "Unknown",
95+
"terms" => [
96+
[
97+
"termNum" => 0,
98+
"termStr" => "Miles Johnson",
99+
"termFreq" => 2621376,
100+
"termHash48" => 224575481707228,
101+
"termHash64" => 4150001371756911641,
102+
"prefixHash64" => 3732660069076179349
103+
],
104+
[
105+
"termNum" => 1,
106+
"termStr" => "type:json",
107+
"termFreq" => 2621664,
108+
"termHash48" => 272064464231140,
109+
"termHash64" => 9877301297136722857,
110+
"prefixHash64" => 7586288672657224048
111+
],
112+
[
113+
"termNum" => 2,
114+
"termStr" => "type:article",
115+
"termFreq" => 524448,
116+
"termHash48" => 210861560163398,
117+
"termHash64" => 12449358332005671483,
118+
"prefixHash64" => 7586288672657224048
119+
]
120+
]
121+
122+
* @todo: find out what hashes are, and to what the freq is relative
123+
* @return array
124+
*/
125+
public function getQueryInfo()
126+
{
127+
return (array)$this->data['queryInfo'];
128+
}
129+
130+
}

0 commit comments

Comments
 (0)