Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MARIA CANTON RELLO
F1 Top 10 Drivers of the Year
Commits
d2126d8e
Commit
d2126d8e
authored
Jan 13, 2022
by
MARIA CANTON RELLO
Browse files
Upload New File
parent
ccc2d4c9
Changes
1
Hide whitespace changes
Inline
Side-by-side
F1Code.ipynb
0 → 100644
View file @
d2126d8e
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Untitled1.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BReZ9FZS9eXu",
"outputId": "d416809f-1ded-4e3d-c054-60dd08b531f2"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: findspark in /usr/local/lib/python3.7/dist-packages (1.4.2)\n",
"Requirement already satisfied: pyspark in /usr/local/lib/python3.7/dist-packages (3.2.0)\n",
"Requirement already satisfied: py4j==0.10.9.2 in /usr/local/lib/python3.7/dist-packages (from pyspark) (0.10.9.2)\n",
"Requirement already satisfied: tweepy in /usr/local/lib/python3.7/dist-packages (3.10.0)\n",
"Requirement already satisfied: requests[socks]>=2.11.1 in /usr/local/lib/python3.7/dist-packages (from tweepy) (2.23.0)\n",
"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.15.0)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.3.0)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->tweepy) (3.1.1)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2021.10.8)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2.10)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (3.0.4)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.24.3)\n",
"Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.7.1)\n"
]
}
],
"source": [
"!pip install findspark #installing libraries\n",
"!pip install pyspark #installing libraries\n",
"!pip install tweepy"
]
},
{
"cell_type": "code",
"source": [
"import tweepy\n",
"\n",
"import pandas as pd\n",
"import re\n",
"\n",
"#Entorno apache spark\n",
"import findspark\n",
"\n",
"import pyspark as ps\n",
"import warnings\n",
"from pyspark.sql import SQLContext\n",
"\n",
"from pyspark.sql.functions import col, udf\n",
"from textblob import TextBlob\n",
"from datetime import datetime\n",
"\n",
"#Tratamiento de datos para entrenamiento\n",
"from pyspark.ml.feature import HashingTF, IDF, Tokenizer\n",
"from pyspark.ml.feature import StringIndexer\n",
"from pyspark.ml import Pipeline"
],
"metadata": {
"id": "3F67IX-49gMR"
},
"execution_count": 12,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#Creating the authentication object\n",
"auth = tweepy.OAuthHandler(\"wsJdwDXjGpvQwYsIMoa9Hlaoi\",\"znPcNjk7Zc10Q44VW7h1dpe8DvIwXRAzwM3V0zL4ZJAfEKRHxi\")\n",
"#Setting your access token and secret\n",
"auth.set_access_token(\"3401533569-veBBhYP3ow2p8ZaUkfrJ1xg9pfwYBlg3v47gGom\", \"zPwhVcB4iaxB0K6YEXU3ndzeJ0x1yYRMPas7JlNnUlc3o\")\n",
"#Creating the API object while passing in auth information\n",
"api = tweepy.API (auth, wait_on_rate_limit = True)"
],
"metadata": {
"id": "7now800d9iM4"
},
"execution_count": 13,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#Busqueda\n",
"search='Lando Norris'\n",
"\n",
"#Numero de tweets para descargar\n",
"num_tweets= 1000\n",
"\n",
"#Dia de busqueda\n",
"date_since=\"2021-12-13\""
],
"metadata": {
"id": "hZ21QQtF9lTi"
},
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\n",
"#Descarga de tweets\n",
"tweets = tweepy.Cursor(api.search,q = search, lang='en', since = date_since).items(num_tweets)\n",
"\n",
"#Inicializacion de las variables\n",
"positive=0\n",
"negative=0\n",
"neutral=0\n",
"sentiment=0\n",
"\n",
"#Limpiamos los tweets para dejarlo en texto plano\n",
"for tweet in tweets:\n",
" final_text= tweet.text.replace('RT', '')\n",
" if final_text.startswith(' @'):\n",
" position =final_text.index(':')\n",
" final_text=final_text[position+2:]\n",
" if final_text.startswith('@'):\n",
" position =final_text.index(' ')\n",
" final_text=final_text[position+2:]\n",
" analysis= TextBlob(final_text)\n",
" sentiment+=analysis.polarity #analiza el sentimiento de cada tweets\n",
" dm= analysis.polarity\n",
" \n",
" #Cuantificamos el sentimiento\n",
" if dm >0:\n",
" positive +=1\n",
" elif dm <0:\n",
" negative+=1\n",
" else:\n",
" neutral+=1\n",
"\n",
"#Imprimimos resultados \n",
"print(search)\n",
"print(sentiment)\n",
"print(positive)\n",
"print(negative)\n",
"print(neutral)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yRnhOfBq9nap",
"outputId": "224b4b7a-0ef7-48d9-cb7d-d4cc5a5bb681"
},
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Lando Norris\n",
"31.97688334235211\n",
"127\n",
"45\n",
"828\n"
]
}
]
},
{
"cell_type": "code",
"source": [
""
],
"metadata": {
"id": "i-Cw7iEW9uh_"
},
"execution_count": 15,
"outputs": []
}
]
}
\ No newline at end of file
%% Cell type:code id: tags:
```
!pip install findspark #installing libraries
!pip install pyspark #installing libraries
!pip install tweepy
```
%% Output
Requirement already satisfied: findspark in /usr/local/lib/python3.7/dist-packages (1.4.2)
Requirement already satisfied: pyspark in /usr/local/lib/python3.7/dist-packages (3.2.0)
Requirement already satisfied: py4j==0.10.9.2 in /usr/local/lib/python3.7/dist-packages (from pyspark) (0.10.9.2)
Requirement already satisfied: tweepy in /usr/local/lib/python3.7/dist-packages (3.10.0)
Requirement already satisfied: requests[socks]>=2.11.1 in /usr/local/lib/python3.7/dist-packages (from tweepy) (2.23.0)
Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.15.0)
Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.3.0)
Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->tweepy) (3.1.1)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2021.10.8)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2.10)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (3.0.4)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.24.3)
Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.7.1)
%% Cell type:code id: tags:
```
import tweepy
import pandas as pd
import re
#Entorno apache spark
import findspark
import pyspark as ps
import warnings
from pyspark.sql import SQLContext
from pyspark.sql.functions import col, udf
from textblob import TextBlob
from datetime import datetime
#Tratamiento de datos para entrenamiento
from pyspark.ml.feature import HashingTF, IDF, Tokenizer
from pyspark.ml.feature import StringIndexer
from pyspark.ml import Pipeline
```
%% Cell type:code id: tags:
```
#Creating the authentication object
auth = tweepy.OAuthHandler("wsJdwDXjGpvQwYsIMoa9Hlaoi","znPcNjk7Zc10Q44VW7h1dpe8DvIwXRAzwM3V0zL4ZJAfEKRHxi")
#Setting your access token and secret
auth.set_access_token("3401533569-veBBhYP3ow2p8ZaUkfrJ1xg9pfwYBlg3v47gGom", "zPwhVcB4iaxB0K6YEXU3ndzeJ0x1yYRMPas7JlNnUlc3o")
#Creating the API object while passing in auth information
api = tweepy.API (auth, wait_on_rate_limit = True)
```
%% Cell type:code id: tags:
```
#Busqueda
search='Lando Norris'
#Numero de tweets para descargar
num_tweets= 1000
#Dia de busqueda
date_since="2021-12-13"
```
%% Cell type:code id: tags:
```
#Descarga de tweets
tweets = tweepy.Cursor(api.search,q = search, lang='en', since = date_since).items(num_tweets)
#Inicializacion de las variables
positive=0
negative=0
neutral=0
sentiment=0
#Limpiamos los tweets para dejarlo en texto plano
for tweet in tweets:
final_text= tweet.text.replace('RT', '')
if final_text.startswith(' @'):
position =final_text.index(':')
final_text=final_text[position+2:]
if final_text.startswith('@'):
position =final_text.index(' ')
final_text=final_text[position+2:]
analysis= TextBlob(final_text)
sentiment+=analysis.polarity #analiza el sentimiento de cada tweets
dm= analysis.polarity
#Cuantificamos el sentimiento
if dm >0:
positive +=1
elif dm <0:
negative+=1
else:
neutral+=1
#Imprimimos resultados
print(search)
print(sentiment)
print(positive)
print(negative)
print(neutral)
```
%% Output
Lando Norris
31.97688334235211
127
45
828
%% Cell type:code id: tags:
```
```
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment