vault backup: 2024-01-02 13:24:23
Affected files: .obsidian/workspace.json Corbia spider.md
This commit is contained in:
parent
3f459c3929
commit
0fd245e063
|
@ -7,28 +7,6 @@
|
||||||
"id": "b7dbd1bdd3ec0467",
|
"id": "b7dbd1bdd3ec0467",
|
||||||
"type": "tabs",
|
"type": "tabs",
|
||||||
"children": [
|
"children": [
|
||||||
{
|
|
||||||
"id": "bf955a0f84453b93",
|
|
||||||
"type": "leaf",
|
|
||||||
"state": {
|
|
||||||
"type": "markdown",
|
|
||||||
"state": {
|
|
||||||
"file": "Weaviate.md",
|
|
||||||
"mode": "source",
|
|
||||||
"source": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "a86e2cbe66021c26",
|
|
||||||
"type": "leaf",
|
|
||||||
"state": {
|
|
||||||
"type": "release-notes",
|
|
||||||
"state": {
|
|
||||||
"currentVersion": "1.5.3"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "f580618efb9aa5bc",
|
"id": "f580618efb9aa5bc",
|
||||||
"type": "leaf",
|
"type": "leaf",
|
||||||
|
@ -41,8 +19,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"currentTab": 2
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"direction": "vertical"
|
"direction": "vertical"
|
||||||
|
@ -92,7 +69,8 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"direction": "horizontal",
|
"direction": "horizontal",
|
||||||
"width": 208.5
|
"width": 208.5,
|
||||||
|
"collapsed": true
|
||||||
},
|
},
|
||||||
"right": {
|
"right": {
|
||||||
"id": "36b6c2860c95bb3e",
|
"id": "36b6c2860c95bb3e",
|
||||||
|
@ -177,7 +155,8 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"direction": "horizontal",
|
"direction": "horizontal",
|
||||||
"width": 371.5
|
"width": 371.5,
|
||||||
|
"collapsed": true
|
||||||
},
|
},
|
||||||
"left-ribbon": {
|
"left-ribbon": {
|
||||||
"hiddenItems": {
|
"hiddenItems": {
|
||||||
|
@ -190,7 +169,7 @@
|
||||||
"copilot:Copilot Chat": false
|
"copilot:Copilot Chat": false
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"active": "f580618efb9aa5bc",
|
"active": "20bc2a10460c7a9a",
|
||||||
"lastOpenFiles": [
|
"lastOpenFiles": [
|
||||||
"Corbia spider.md",
|
"Corbia spider.md",
|
||||||
"Docker.md",
|
"Docker.md",
|
||||||
|
|
|
@ -45,5 +45,66 @@ def scrape_domain(self):
|
||||||
return json.dumps({"success": False})
|
return json.dumps({"success": False})
|
||||||
```
|
```
|
||||||
|
|
||||||
|
For each page that will be processed by the domain based function, the `BeautifulSoup` object is accessible via `self.soup`
|
||||||
|
|
||||||
|
```html
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Venmo truffaut shabby chic organic</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="root">
|
||||||
|
|
||||||
|
<div class="top">
|
||||||
|
<h2>
|
||||||
|
<ul>
|
||||||
|
<li><a href="/01">01</a></li>
|
||||||
|
<li><a href="/02">02</a></li>
|
||||||
|
<li><a href="/03">03</a></li>
|
||||||
|
<li><a href="/04">04</a></li>
|
||||||
|
</ul>
|
||||||
|
</h2>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="article main">
|
||||||
|
<h2 class="text">Venmo truffaut shabby chic organic</h2>
|
||||||
|
|
||||||
|
<section class="text">
|
||||||
|
<p>I'm baby wayfarers tote bag gochujang cred food truck VHS quinoa kogi Brooklyn yr vegan etsy.</p>
|
||||||
|
<p>Portland squid DSA, raclette flannel pinterest craft beer cloud bread pour-over same.</p>
|
||||||
|
<p>Air plant pickled man braid tilde drinking vinegar ascot DIY poke meditation iceland JOMO sustainable. Hell of tbh kombucha +1 listicle.</p>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="footer">
|
||||||
|
<ul>
|
||||||
|
<li><a href="/01">01</a></li>
|
||||||
|
<li><a href="/02">02</a></li>
|
||||||
|
<li><a href="/03">03</a></li>
|
||||||
|
<li><a href="/04">04</a></li>
|
||||||
|
</ul>
|
||||||
|
<p class="text"></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
def scrape_domain(self):
|
||||||
|
try:
|
||||||
|
title = self.soup.find('title')
|
||||||
|
data = {
|
||||||
|
"title" : title.text.strip
|
||||||
|
"success" : True
|
||||||
|
}
|
||||||
|
return json.dumps(data, indent=4, ensure_ascii=False)
|
||||||
|
except Exception as e:
|
||||||
|
return json.dumps({"success": False})
|
||||||
|
```
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue