Commit 4cbe73d6 authored by MARIA CANTON RELLO's avatar MARIA CANTON RELLO
Browse files

Delete F1Code.ipynb

parent d2126d8e
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Untitled1.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BReZ9FZS9eXu",
"outputId": "d416809f-1ded-4e3d-c054-60dd08b531f2"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: findspark in /usr/local/lib/python3.7/dist-packages (1.4.2)\n",
"Requirement already satisfied: pyspark in /usr/local/lib/python3.7/dist-packages (3.2.0)\n",
"Requirement already satisfied: py4j==0.10.9.2 in /usr/local/lib/python3.7/dist-packages (from pyspark) (0.10.9.2)\n",
"Requirement already satisfied: tweepy in /usr/local/lib/python3.7/dist-packages (3.10.0)\n",
"Requirement already satisfied: requests[socks]>=2.11.1 in /usr/local/lib/python3.7/dist-packages (from tweepy) (2.23.0)\n",
"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.15.0)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.3.0)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->tweepy) (3.1.1)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2021.10.8)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2.10)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (3.0.4)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.24.3)\n",
"Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.7.1)\n"
]
}
],
"source": [
"!pip install findspark #installing libraries\n",
"!pip install pyspark #installing libraries\n",
"!pip install tweepy"
]
},
{
"cell_type": "code",
"source": [
"import tweepy\n",
"\n",
"import pandas as pd\n",
"import re\n",
"\n",
"#Entorno apache spark\n",
"import findspark\n",
"\n",
"import pyspark as ps\n",
"import warnings\n",
"from pyspark.sql import SQLContext\n",
"\n",
"from pyspark.sql.functions import col, udf\n",
"from textblob import TextBlob\n",
"from datetime import datetime\n",
"\n",
"#Tratamiento de datos para entrenamiento\n",
"from pyspark.ml.feature import HashingTF, IDF, Tokenizer\n",
"from pyspark.ml.feature import StringIndexer\n",
"from pyspark.ml import Pipeline"
],
"metadata": {
"id": "3F67IX-49gMR"
},
"execution_count": 12,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#Creating the authentication object\n",
"auth = tweepy.OAuthHandler(\"wsJdwDXjGpvQwYsIMoa9Hlaoi\",\"znPcNjk7Zc10Q44VW7h1dpe8DvIwXRAzwM3V0zL4ZJAfEKRHxi\")\n",
"#Setting your access token and secret\n",
"auth.set_access_token(\"3401533569-veBBhYP3ow2p8ZaUkfrJ1xg9pfwYBlg3v47gGom\", \"zPwhVcB4iaxB0K6YEXU3ndzeJ0x1yYRMPas7JlNnUlc3o\")\n",
"#Creating the API object while passing in auth information\n",
"api = tweepy.API (auth, wait_on_rate_limit = True)"
],
"metadata": {
"id": "7now800d9iM4"
},
"execution_count": 13,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#Busqueda\n",
"search='Lando Norris'\n",
"\n",
"#Numero de tweets para descargar\n",
"num_tweets= 1000\n",
"\n",
"#Dia de busqueda\n",
"date_since=\"2021-12-13\""
],
"metadata": {
"id": "hZ21QQtF9lTi"
},
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\n",
"#Descarga de tweets\n",
"tweets = tweepy.Cursor(api.search,q = search, lang='en', since = date_since).items(num_tweets)\n",
"\n",
"#Inicializacion de las variables\n",
"positive=0\n",
"negative=0\n",
"neutral=0\n",
"sentiment=0\n",
"\n",
"#Limpiamos los tweets para dejarlo en texto plano\n",
"for tweet in tweets:\n",
" final_text= tweet.text.replace('RT', '')\n",
" if final_text.startswith(' @'):\n",
" position =final_text.index(':')\n",
" final_text=final_text[position+2:]\n",
" if final_text.startswith('@'):\n",
" position =final_text.index(' ')\n",
" final_text=final_text[position+2:]\n",
" analysis= TextBlob(final_text)\n",
" sentiment+=analysis.polarity #analiza el sentimiento de cada tweets\n",
" dm= analysis.polarity\n",
" \n",
" #Cuantificamos el sentimiento\n",
" if dm >0:\n",
" positive +=1\n",
" elif dm <0:\n",
" negative+=1\n",
" else:\n",
" neutral+=1\n",
"\n",
"#Imprimimos resultados \n",
"print(search)\n",
"print(sentiment)\n",
"print(positive)\n",
"print(negative)\n",
"print(neutral)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yRnhOfBq9nap",
"outputId": "224b4b7a-0ef7-48d9-cb7d-d4cc5a5bb681"
},
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Lando Norris\n",
"31.97688334235211\n",
"127\n",
"45\n",
"828\n"
]
}
]
},
{
"cell_type": "code",
"source": [
""
],
"metadata": {
"id": "i-Cw7iEW9uh_"
},
"execution_count": 15,
"outputs": []
}
]
}
\ No newline at end of file
%% Cell type:code id: tags:
```
!pip install findspark #installing libraries
!pip install pyspark #installing libraries
!pip install tweepy
```
%%%% Output: stream
Requirement already satisfied: findspark in /usr/local/lib/python3.7/dist-packages (1.4.2)
Requirement already satisfied: pyspark in /usr/local/lib/python3.7/dist-packages (3.2.0)
Requirement already satisfied: py4j==0.10.9.2 in /usr/local/lib/python3.7/dist-packages (from pyspark) (0.10.9.2)
Requirement already satisfied: tweepy in /usr/local/lib/python3.7/dist-packages (3.10.0)
Requirement already satisfied: requests[socks]>=2.11.1 in /usr/local/lib/python3.7/dist-packages (from tweepy) (2.23.0)
Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.15.0)
Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.3.0)
Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->tweepy) (3.1.1)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2021.10.8)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2.10)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (3.0.4)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.24.3)
Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.7.1)
%% Cell type:code id: tags:
```
import tweepy
import pandas as pd
import re
#Entorno apache spark
import findspark
import pyspark as ps
import warnings
from pyspark.sql import SQLContext
from pyspark.sql.functions import col, udf
from textblob import TextBlob
from datetime import datetime
#Tratamiento de datos para entrenamiento
from pyspark.ml.feature import HashingTF, IDF, Tokenizer
from pyspark.ml.feature import StringIndexer
from pyspark.ml import Pipeline
```
%% Cell type:code id: tags:
```
#Creating the authentication object
auth = tweepy.OAuthHandler("wsJdwDXjGpvQwYsIMoa9Hlaoi","znPcNjk7Zc10Q44VW7h1dpe8DvIwXRAzwM3V0zL4ZJAfEKRHxi")
#Setting your access token and secret
auth.set_access_token("3401533569-veBBhYP3ow2p8ZaUkfrJ1xg9pfwYBlg3v47gGom", "zPwhVcB4iaxB0K6YEXU3ndzeJ0x1yYRMPas7JlNnUlc3o")
#Creating the API object while passing in auth information
api = tweepy.API (auth, wait_on_rate_limit = True)
```
%% Cell type:code id: tags:
```
#Busqueda
search='Lando Norris'
#Numero de tweets para descargar
num_tweets= 1000
#Dia de busqueda
date_since="2021-12-13"
```
%% Cell type:code id: tags:
```
#Descarga de tweets
tweets = tweepy.Cursor(api.search,q = search, lang='en', since = date_since).items(num_tweets)
#Inicializacion de las variables
positive=0
negative=0
neutral=0
sentiment=0
#Limpiamos los tweets para dejarlo en texto plano
for tweet in tweets:
final_text= tweet.text.replace('RT', '')
if final_text.startswith(' @'):
position =final_text.index(':')
final_text=final_text[position+2:]
if final_text.startswith('@'):
position =final_text.index(' ')
final_text=final_text[position+2:]
analysis= TextBlob(final_text)
sentiment+=analysis.polarity #analiza el sentimiento de cada tweets
dm= analysis.polarity
#Cuantificamos el sentimiento
if dm >0:
positive +=1
elif dm <0:
negative+=1
else:
neutral+=1
#Imprimimos resultados
print(search)
print(sentiment)
print(positive)
print(negative)
print(neutral)
```
%%%% Output: stream
Lando Norris
31.97688334235211
127
45
828
%% Cell type:code id: tags:
```
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment