vault backup: 2024-01-02 13:24:23
Affected files: .obsidian/workspace.json Corbia spider.md
This commit is contained in:
parent
3f459c3929
commit
0fd245e063
|
@ -7,28 +7,6 @@
|
|||
"id": "b7dbd1bdd3ec0467",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "bf955a0f84453b93",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "markdown",
|
||||
"state": {
|
||||
"file": "Weaviate.md",
|
||||
"mode": "source",
|
||||
"source": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "a86e2cbe66021c26",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "release-notes",
|
||||
"state": {
|
||||
"currentVersion": "1.5.3"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "f580618efb9aa5bc",
|
||||
"type": "leaf",
|
||||
|
@ -41,8 +19,7 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"currentTab": 2
|
||||
]
|
||||
}
|
||||
],
|
||||
"direction": "vertical"
|
||||
|
@ -92,7 +69,8 @@
|
|||
}
|
||||
],
|
||||
"direction": "horizontal",
|
||||
"width": 208.5
|
||||
"width": 208.5,
|
||||
"collapsed": true
|
||||
},
|
||||
"right": {
|
||||
"id": "36b6c2860c95bb3e",
|
||||
|
@ -177,7 +155,8 @@
|
|||
}
|
||||
],
|
||||
"direction": "horizontal",
|
||||
"width": 371.5
|
||||
"width": 371.5,
|
||||
"collapsed": true
|
||||
},
|
||||
"left-ribbon": {
|
||||
"hiddenItems": {
|
||||
|
@ -190,7 +169,7 @@
|
|||
"copilot:Copilot Chat": false
|
||||
}
|
||||
},
|
||||
"active": "f580618efb9aa5bc",
|
||||
"active": "20bc2a10460c7a9a",
|
||||
"lastOpenFiles": [
|
||||
"Corbia spider.md",
|
||||
"Docker.md",
|
||||
|
|
|
@ -45,5 +45,66 @@ def scrape_domain(self):
|
|||
return json.dumps({"success": False})
|
||||
```
|
||||
|
||||
For each page that will be processed by the domain based function, the `BeautifulSoup` object is accessible via `self.soup`
|
||||
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Venmo truffaut shabby chic organic</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root">
|
||||
|
||||
<div class="top">
|
||||
<h2>
|
||||
<ul>
|
||||
<li><a href="/01">01</a></li>
|
||||
<li><a href="/02">02</a></li>
|
||||
<li><a href="/03">03</a></li>
|
||||
<li><a href="/04">04</a></li>
|
||||
</ul>
|
||||
</h2>
|
||||
</div>
|
||||
|
||||
<div class="article main">
|
||||
<h2 class="text">Venmo truffaut shabby chic organic</h2>
|
||||
|
||||
<section class="text">
|
||||
<p>I'm baby wayfarers tote bag gochujang cred food truck VHS quinoa kogi Brooklyn yr vegan etsy.</p>
|
||||
<p>Portland squid DSA, raclette flannel pinterest craft beer cloud bread pour-over same.</p>
|
||||
<p>Air plant pickled man braid tilde drinking vinegar ascot DIY poke meditation iceland JOMO sustainable. Hell of tbh kombucha +1 listicle.</p>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<div class="footer">
|
||||
<ul>
|
||||
<li><a href="/01">01</a></li>
|
||||
<li><a href="/02">02</a></li>
|
||||
<li><a href="/03">03</a></li>
|
||||
<li><a href="/04">04</a></li>
|
||||
</ul>
|
||||
<p class="text"></p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
|
||||
|
||||
```python
|
||||
def scrape_domain(self):
|
||||
try:
|
||||
title = self.soup.find('title')
|
||||
data = {
|
||||
"title" : title.text.strip
|
||||
"success" : True
|
||||
}
|
||||
return json.dumps(data, indent=4, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return json.dumps({"success": False})
|
||||
```
|
||||
|
||||
|
|
Loading…
Reference in New Issue