jijijijiji

This commit is contained in:
Kubrik 2024-05-31 07:16:15 +02:00
parent 7350cb5662
commit ac5511f4f0

View file

@ -1,12 +1,90 @@
import scrapy
class BlogSpider(scrapy.Spider):
name = 'blogspider'
start_urls = ['https://www.iata.csic.es/']
def parse(self, response):
for title in response.css('.oxy-post-title'):
yield {'title': title.css('::text').get()}
for next_page in response.css('a.next'):
yield response.follow(next_page, self.parse)
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: scrapy in c:\\users\\dres2\\anaconda3\\lib\\site-packages (2.8.0)\n",
"Requirement already satisfied: Twisted>=18.9.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (23.10.0)\n",
"Requirement already satisfied: cryptography>=3.4.6 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (42.0.2)\n",
"Requirement already satisfied: cssselect>=0.9.1 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (1.2.0)\n",
"Requirement already satisfied: itemloaders>=1.0.1 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (1.1.0)\n",
"Requirement already satisfied: parsel>=1.5.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (1.8.1)\n",
"Requirement already satisfied: pyOpenSSL>=21.0.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (24.0.0)\n",
"Requirement already satisfied: queuelib>=1.4.2 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (1.6.2)\n",
"Requirement already satisfied: service-identity>=18.1.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (18.1.0)\n",
"Requirement already satisfied: w3lib>=1.17.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (2.1.2)\n",
"Requirement already satisfied: zope.interface>=5.1.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (5.4.0)\n",
"Requirement already satisfied: protego>=0.1.15 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (0.1.16)\n",
"Requirement already satisfied: itemadapter>=0.1.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (0.3.0)\n",
"Requirement already satisfied: setuptools in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (68.2.2)\n",
"Requirement already satisfied: packaging in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (23.1)\n",
"Requirement already satisfied: tldextract in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (3.2.0)\n",
"Requirement already satisfied: lxml>=4.3.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (4.9.3)\n",
"Requirement already satisfied: PyDispatcher>=2.0.5 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from scrapy) (2.0.5)\n",
"Requirement already satisfied: cffi>=1.12 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from cryptography>=3.4.6->scrapy) (1.16.0)\n",
"Requirement already satisfied: jmespath>=0.9.5 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from itemloaders>=1.0.1->scrapy) (1.0.1)\n",
"Requirement already satisfied: six in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from protego>=0.1.15->scrapy) (1.16.0)\n",
"Requirement already satisfied: attrs>=16.0.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from service-identity>=18.1.0->scrapy) (23.1.0)\n",
"Requirement already satisfied: pyasn1-modules in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from service-identity>=18.1.0->scrapy) (0.2.8)\n",
"Requirement already satisfied: pyasn1 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from service-identity>=18.1.0->scrapy) (0.4.8)\n",
"Requirement already satisfied: automat>=0.8.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from Twisted>=18.9.0->scrapy) (20.2.0)\n",
"Requirement already satisfied: constantly>=15.1 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from Twisted>=18.9.0->scrapy) (23.10.4)\n",
"Requirement already satisfied: hyperlink>=17.1.1 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from Twisted>=18.9.0->scrapy) (21.0.0)\n",
"Requirement already satisfied: incremental>=22.10.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from Twisted>=18.9.0->scrapy) (22.10.0)\n",
"Requirement already satisfied: twisted-iocpsupport<2,>=1.0.2 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from Twisted>=18.9.0->scrapy) (1.0.2)\n",
"Requirement already satisfied: typing-extensions>=4.2.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from Twisted>=18.9.0->scrapy) (4.9.0)\n",
"Requirement already satisfied: idna in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from tldextract->scrapy) (3.4)\n",
"Requirement already satisfied: requests>=2.1.0 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from tldextract->scrapy) (2.31.0)\n",
"Requirement already satisfied: requests-file>=1.4 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from tldextract->scrapy) (1.5.1)\n",
"Requirement already satisfied: filelock>=3.0.8 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from tldextract->scrapy) (3.13.1)\n",
"Requirement already satisfied: pycparser in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from cffi>=1.12->cryptography>=3.4.6->scrapy) (2.21)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from requests>=2.1.0->tldextract->scrapy) (2.0.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from requests>=2.1.0->tldextract->scrapy) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dres2\\anaconda3\\lib\\site-packages (from requests>=2.1.0->tldextract->scrapy) (2024.2.2)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"import scrapy\n",
"class BlogSpider(scrapy.Spider):\n",
" name = 'blogspider'\n",
" start_urls = ['https://www.iata.csic.es/']\n",
"\n",
" def parse(self, response):\n",
" for title in response.css('.oxy-post-title'):\n",
" yield {'title': title.css('::text').get()}\n",
"\n",
" for next_page in response.css('a.next'):\n",
" yield response.follow(next_page, self.parse)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}