Skip to content

Commit 49e1628

Browse files
Update README.md
1 parent 136fc51 commit 49e1628

1 file changed

Lines changed: 42 additions & 26 deletions

File tree

README.md

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,33 @@ Most of the PHP parsers I encountered in the past were either too complicated, *
44

55
# pQuery Web Scraper tutorial
66
## Getting started
7-
To start coding with pQuery, just include the main php file and create a Web Scraper class object and send the parameters through the constructor, it will know if you sent a URL or an HTML string:
8-
7+
To start coding with pQuery simply include the main PHP file in this repository and initilize an object class like this:
98
```php
9+
// include webscraper.php file
1010
include "path/webscraper.php";
11-
12-
$doc = new WebScraper("https://www.examplelink.com");
13-
11+
// create a new object
12+
$doc = new WebScraper();
13+
```
14+
In case you want to load a string containing your HTML or XML:
15+
```php
16+
$doc->loadHTML($html);
1417
// or
15-
16-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$yourhtml."</body></html>");
17-
18-
// both work
18+
$doc->loadXML($xml);
1919
```
20-
And then, you can "echo" your parsed web page by using the `echo` function:
21-
20+
And when you finish parsing your doc you can "echo" your parsed web page by using the `echo` function:
2221
```php
23-
2422
$doc->echo();
25-
2623
```
24+
But, for now, we will be using this initialization:
25+
```php
26+
include "path/webscraper.php";
27+
$doc = new WebScraper();
28+
$doc->loadHTML($html);
2729

28-
Simple as that!
30+
// code
2931

32+
$doc->echo();
33+
```
3034
## How do I select nodes within my HTML document?
3135
For this, we use `query`, or its simplified version: `Q`, as its parameter we can pass in a string with the CSS query we want, for example: `$doc->Q("div.box > span#tooltip")`.
3236

@@ -131,7 +135,8 @@ Wrap or unwrap node elements with other node elements.
131135

132136
```php
133137
include "path/webscraper.php";
134-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$html."</body></html>");
138+
$doc = new WebScraper();
139+
$doc->loadHTML($html);
135140

136141
$doc->Q("img[src='image.jpg']")->wrap("<figure></figure>");
137142
// also possible: $doc->Q("img[src='image.jpg']")->wrap("figure");
@@ -151,7 +156,8 @@ You may also give the image wrapper attributes, like `style`, `class`, `id`, etc
151156

152157
```php
153158
include "path/webscraper.php";
154-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$html."</body></html>");
159+
$doc = new WebScraper();
160+
$doc->loadHTML($html);
155161

156162
$doc->Q("img[src='image.jpg']")->wrap("<figure class='img-wrapper' style='width: 100px; height: 100px;'></figure>");
157163

@@ -168,7 +174,8 @@ In case you don't want it wrapped anymore, run it:
168174

169175
```php
170176
include "path/webscraper.php";
171-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$html."</body></html>");
177+
$doc = new WebScraper();
178+
$doc->loadHTML($html);
172179

173180
$doc->Q("img[src='image.jpg']")->unwrap();
174181

@@ -193,7 +200,8 @@ Add and remove class to DOM elements.
193200

194201
```php
195202
include "path/webscraper.php";
196-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$html."</body></html>");
203+
$doc = new WebScraper();
204+
$doc->loadHTML($html);
197205

198206
$doc->Q("h1")->addClass("title");
199207
$doc->Q("h2")->removeClass("title");
@@ -227,7 +235,8 @@ In case `addClass` and `removeClass` are not enough (and probably are not), you
227235

228236
```php
229237
include "path/webscraper.php";
230-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$html."</body></html>");
238+
$doc = new WebScraper();
239+
$doc->loadHTML($html);
231240

232241
$doc->Q("form input[name='name']")->setAttribute("id", "name");
233242
$doc->Q("form input[name='name']")->removeAttribute("tabindex");
@@ -267,7 +276,8 @@ $doc->echo();
267276

268277
```php
269278
include "path/webscraper.php";
270-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$html."</body></html>");
279+
$doc = new WebScraper();
280+
$doc->loadHTML($html);
271281

272282
echo "Result from html(): \n";
273283
echo $doc->Q("nav")->html();
@@ -306,7 +316,8 @@ Result from text():
306316

307317
```php
308318
include "path/webscraper.php";
309-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$html."</body></html>");
319+
$doc = new WebScraper();
320+
$doc->loadHTML($html);
310321

311322
$doc->Q("nav")->html('
312323
<ul>
@@ -353,7 +364,8 @@ $doc->echo();
353364

354365
```php
355366
include "path/webscraper.php";
356-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$html."</body></html>");
367+
$doc = new WebScraper();
368+
$doc->loadHTML($html);
357369

358370
$i = 0;
359371
while ($i < 5) {
@@ -404,7 +416,8 @@ These functions are built to test if an element has a specific class/attribute o
404416

405417
```php
406418
include "path/webscraper.php";
407-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$html."</body></html>");
419+
$doc = new WebScraper();
420+
$doc->loadHTML($html);
408421

409422
if ($doc->Q("p[1]")->hasClass("rice")) {
410423
echo "p[1] has class \"rice\": true \n\n";
@@ -455,7 +468,8 @@ It accepts a boolean as a parameter: `true` or `false`, `true` tells it to keep
455468

456469
```php
457470
include "path/webscraper.php";
458-
$doc = new WebScraper("<!DOCTYPE html><html>".$html."</html>");
471+
$doc = new WebScraper();
472+
$doc->loadHTML($html);
459473

460474
$doc->Q("style")->delete();
461475

@@ -477,7 +491,8 @@ In the other hand...
477491

478492
```php
479493
include "path/webscraper.php";
480-
$doc = new WebScraper("<!DOCTYPE html><html>".$html."</html>");
494+
$doc = new WebScraper();
495+
$doc->loadHTML($html);
481496

482497
$doc->Q("p")->delete(true);
483498

@@ -516,7 +531,8 @@ These functions are built to test if an element has a specific class/attribute o
516531

517532
```php
518533
include "path/webscraper.php";
519-
$doc = new WebScraper("<!DOCTYPE html><html><body>".$html."</body></html>");
534+
$doc = new WebScraper();
535+
$doc->loadHTML($html);
520536

521537
if ($doc->Q("p[1]")->hasClass("rice")) {
522538
echo "p[1] has class \"rice\": true \n\n";

0 commit comments

Comments
 (0)