scratch – Blame information for rev

Subversion Repositories:
Rev:
Rev Author Line No. Line
87 office 1 <?php
2  
3 namespace Fusonic\OpenGraph;
4  
5 use Fusonic\Linq\Linq;
6 use Fusonic\OpenGraph\Objects\ObjectBase;
7 use Fusonic\OpenGraph\Objects\Website;
8 use GuzzleHttp\Adapter\AdapterInterface;
9 use GuzzleHttp\Client;
10 use Symfony\Component\DomCrawler\Crawler;
11  
12 /**
13 * Consumer that extracts Open Graph data from either a URL or a HTML string.
14 */
15 class Consumer
16 {
17 private $client;
18  
19 /**
20 * When enabled, crawler will read content of title and meta description if no
21 * Open Graph data is provided by target page.
22 *
23 * @var bool
24 */
25 public $useFallbackMode = false;
26  
27 /**
28 * When enabled, crawler will throw exceptions for some crawling errors like unexpected
29 * Open Graph elements.
30 *
31 * @var bool
32 */
33 public $debug = false;
34  
35 /**
36 * @param AdapterInterface $adapter Guzzle adapter to use for making HTTP requests.
37 * @param array $config Optional Guzzle config overrides.
38 */
39 public function __construct(AdapterInterface $adapter = null, array $config = [])
40 {
41 $config = array_replace_recursive(['adapter' => $adapter], $config);
42  
43 $this->client = new Client($config);
44 }
45  
46 /**
47 * Fetches HTML content from the given URL and then crawls it for Open Graph data.
48 *
49 * @param string $url URL to be crawled.
50 *
51 * @return Website
52 */
53 public function loadUrl($url)
54 {
55 // Fetch HTTP content using Guzzle
56 $response = $this->client->get($url);
57  
58 return $this->loadHtml($response->getBody()->__toString(), $url);
59 }
60  
61 /**
62 * Crawls the given HTML string for OpenGraph data.
63 *
64 * @param string $html HTML string, usually whole content of crawled web resource.
65 * @param string $fallbackUrl URL to use when fallback mode is enabled.
66 *
67 * @return ObjectBase
68 */
69 public function loadHtml($html, $fallbackUrl = null)
70 {
71 // Extract all data that can be found
72 $page = $this->extractOpenGraphData($html);
73  
74 // Use the user's URL as fallback
75 if ($this->useFallbackMode && $page->url === null) {
76 $page->url = $fallbackUrl;
77 }
78  
79 // Return result
80 return $page;
81 }
82  
83 private function extractOpenGraphData($content)
84 {
85 $crawler = new Crawler($content);
86  
87 $properties = [];
88 foreach(['name', 'property'] as $t)
89 {
90 // Get all meta-tags starting with "og:"
91 $ogMetaTags = $crawler->filter("meta[{$t}^='og:']");
92 // Create clean property array
93 $props = Linq::from($ogMetaTags)
94 ->select(
95 function (\DOMElement $tag) use ($t) {
96 $name = strtolower(trim($tag->getAttribute($t)));
97 $value = trim($tag->getAttribute("content"));
98 return new Property($name, $value);
99 }
100 )
101 ->toArray();
102 $properties = array_merge($properties, $props);
103  
104 }
105  
106 // Create new object of the correct type
107 $typeProperty = Linq::from($properties)
108 ->firstOrNull(
109 function (Property $property) {
110 return $property->key === Property::TYPE;
111 }
112 );
113 switch ($typeProperty !== null ? $typeProperty->value : null) {
114 default:
115 $object = new Website();
116 break;
117 }
118  
119 // Assign all properties to the object
120 $object->assignProperties($properties, $this->debug);
121  
122 // Fallback for title
123 if ($this->useFallbackMode && !$object->title) {
124 $titleElement = $crawler->filter("title")->first();
125 if ($titleElement) {
126 $object->title = trim($titleElement->text());
127 }
128 }
129  
130 // Fallback for description
131 if ($this->useFallbackMode && !$object->description) {
132 $descriptionElement = $crawler->filter("meta[property='description']")->first();
133 if ($descriptionElement) {
134 $object->description = trim($descriptionElement->attr("content"));
135 }
136 }
137  
138 return $object;
139 }
140 }