scratch – Blame information for rev
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
87 | office | 1 | <?php |
2 | |||
3 | namespace Fusonic\OpenGraph; |
||
4 | |||
5 | use Fusonic\Linq\Linq; |
||
6 | use Fusonic\OpenGraph\Objects\ObjectBase; |
||
7 | use Fusonic\OpenGraph\Objects\Website; |
||
8 | use GuzzleHttp\Adapter\AdapterInterface; |
||
9 | use GuzzleHttp\Client; |
||
10 | use Symfony\Component\DomCrawler\Crawler; |
||
11 | |||
12 | /** |
||
13 | * Consumer that extracts Open Graph data from either a URL or a HTML string. |
||
14 | */ |
||
15 | class Consumer |
||
16 | { |
||
17 | private $client; |
||
18 | |||
19 | /** |
||
20 | * When enabled, crawler will read content of title and meta description if no |
||
21 | * Open Graph data is provided by target page. |
||
22 | * |
||
23 | * @var bool |
||
24 | */ |
||
25 | public $useFallbackMode = false; |
||
26 | |||
27 | /** |
||
28 | * When enabled, crawler will throw exceptions for some crawling errors like unexpected |
||
29 | * Open Graph elements. |
||
30 | * |
||
31 | * @var bool |
||
32 | */ |
||
33 | public $debug = false; |
||
34 | |||
35 | /** |
||
36 | * @param AdapterInterface $adapter Guzzle adapter to use for making HTTP requests. |
||
37 | * @param array $config Optional Guzzle config overrides. |
||
38 | */ |
||
39 | public function __construct(AdapterInterface $adapter = null, array $config = []) |
||
40 | { |
||
41 | $config = array_replace_recursive(['adapter' => $adapter], $config); |
||
42 | |||
43 | $this->client = new Client($config); |
||
44 | } |
||
45 | |||
46 | /** |
||
47 | * Fetches HTML content from the given URL and then crawls it for Open Graph data. |
||
48 | * |
||
49 | * @param string $url URL to be crawled. |
||
50 | * |
||
51 | * @return Website |
||
52 | */ |
||
53 | public function loadUrl($url) |
||
54 | { |
||
55 | // Fetch HTTP content using Guzzle |
||
56 | $response = $this->client->get($url); |
||
57 | |||
58 | return $this->loadHtml($response->getBody()->__toString(), $url); |
||
59 | } |
||
60 | |||
61 | /** |
||
62 | * Crawls the given HTML string for OpenGraph data. |
||
63 | * |
||
64 | * @param string $html HTML string, usually whole content of crawled web resource. |
||
65 | * @param string $fallbackUrl URL to use when fallback mode is enabled. |
||
66 | * |
||
67 | * @return ObjectBase |
||
68 | */ |
||
69 | public function loadHtml($html, $fallbackUrl = null) |
||
70 | { |
||
71 | // Extract all data that can be found |
||
72 | $page = $this->extractOpenGraphData($html); |
||
73 | |||
74 | // Use the user's URL as fallback |
||
75 | if ($this->useFallbackMode && $page->url === null) { |
||
76 | $page->url = $fallbackUrl; |
||
77 | } |
||
78 | |||
79 | // Return result |
||
80 | return $page; |
||
81 | } |
||
82 | |||
83 | private function extractOpenGraphData($content) |
||
84 | { |
||
85 | $crawler = new Crawler($content); |
||
86 | |||
87 | $properties = []; |
||
88 | foreach(['name', 'property'] as $t) |
||
89 | { |
||
90 | // Get all meta-tags starting with "og:" |
||
91 | $ogMetaTags = $crawler->filter("meta[{$t}^='og:']"); |
||
92 | // Create clean property array |
||
93 | $props = Linq::from($ogMetaTags) |
||
94 | ->select( |
||
95 | function (\DOMElement $tag) use ($t) { |
||
96 | $name = strtolower(trim($tag->getAttribute($t))); |
||
97 | $value = trim($tag->getAttribute("content")); |
||
98 | return new Property($name, $value); |
||
99 | } |
||
100 | ) |
||
101 | ->toArray(); |
||
102 | $properties = array_merge($properties, $props); |
||
103 | |||
104 | } |
||
105 | |||
106 | // Create new object of the correct type |
||
107 | $typeProperty = Linq::from($properties) |
||
108 | ->firstOrNull( |
||
109 | function (Property $property) { |
||
110 | return $property->key === Property::TYPE; |
||
111 | } |
||
112 | ); |
||
113 | switch ($typeProperty !== null ? $typeProperty->value : null) { |
||
114 | default: |
||
115 | $object = new Website(); |
||
116 | break; |
||
117 | } |
||
118 | |||
119 | // Assign all properties to the object |
||
120 | $object->assignProperties($properties, $this->debug); |
||
121 | |||
122 | // Fallback for title |
||
123 | if ($this->useFallbackMode && !$object->title) { |
||
124 | $titleElement = $crawler->filter("title")->first(); |
||
125 | if ($titleElement) { |
||
126 | $object->title = trim($titleElement->text()); |
||
127 | } |
||
128 | } |
||
129 | |||
130 | // Fallback for description |
||
131 | if ($this->useFallbackMode && !$object->description) { |
||
132 | $descriptionElement = $crawler->filter("meta[property='description']")->first(); |
||
133 | if ($descriptionElement) { |
||
134 | $object->description = trim($descriptionElement->attr("content")); |
||
135 | } |
||
136 | } |
||
137 | |||
138 | return $object; |
||
139 | } |
||
140 | } |