vault backup: 2024-01-02 13:24:23

Affected files:
.obsidian/workspace.json
Corbia spider.md
This commit is contained in:
ed barz 2024-01-02 13:24:23 +01:00
parent 3f459c3929
commit 0fd245e063
2 changed files with 67 additions and 27 deletions

View File

@ -7,28 +7,6 @@
"id": "b7dbd1bdd3ec0467",
"type": "tabs",
"children": [
{
"id": "bf955a0f84453b93",
"type": "leaf",
"state": {
"type": "markdown",
"state": {
"file": "Weaviate.md",
"mode": "source",
"source": false
}
}
},
{
"id": "a86e2cbe66021c26",
"type": "leaf",
"state": {
"type": "release-notes",
"state": {
"currentVersion": "1.5.3"
}
}
},
{
"id": "f580618efb9aa5bc",
"type": "leaf",
@ -41,8 +19,7 @@
}
}
}
],
"currentTab": 2
]
}
],
"direction": "vertical"
@ -92,7 +69,8 @@
}
],
"direction": "horizontal",
"width": 208.5
"width": 208.5,
"collapsed": true
},
"right": {
"id": "36b6c2860c95bb3e",
@ -177,7 +155,8 @@
}
],
"direction": "horizontal",
"width": 371.5
"width": 371.5,
"collapsed": true
},
"left-ribbon": {
"hiddenItems": {
@ -190,7 +169,7 @@
"copilot:Copilot Chat": false
}
},
"active": "f580618efb9aa5bc",
"active": "20bc2a10460c7a9a",
"lastOpenFiles": [
"Corbia spider.md",
"Docker.md",

View File

@ -45,5 +45,66 @@ def scrape_domain(self):
return json.dumps({"success": False})
```
For each page that will be processed by the domain based function, the `BeautifulSoup` object is accessible via `self.soup`
```html
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Venmo truffaut shabby chic organic</title>
</head>
<body>
<div id="root">
<div class="top">
<h2>
<ul>
<li><a href="/01">01</a></li>
<li><a href="/02">02</a></li>
<li><a href="/03">03</a></li>
<li><a href="/04">04</a></li>
</ul>
</h2>
</div>
<div class="article main">
<h2 class="text">Venmo truffaut shabby chic organic</h2>
<section class="text">
<p>I'm baby wayfarers tote bag gochujang cred food truck VHS quinoa kogi Brooklyn yr vegan etsy.</p>
<p>Portland squid DSA, raclette flannel pinterest craft beer cloud bread pour-over same.</p>
<p>Air plant pickled man braid tilde drinking vinegar ascot DIY poke meditation iceland JOMO sustainable. Hell of tbh kombucha +1 listicle.</p>
</section>
</div>
<div class="footer">
<ul>
<li><a href="/01">01</a></li>
<li><a href="/02">02</a></li>
<li><a href="/03">03</a></li>
<li><a href="/04">04</a></li>
</ul>
<p class="text"></p>
</div>
</div>
</body>
</html>
```
```python
def scrape_domain(self):
try:
title = self.soup.find('title')
data = {
"title" : title.text.strip
"success" : True
}
return json.dumps(data, indent=4, ensure_ascii=False)
except Exception as e:
return json.dumps({"success": False})
```