Commit 42ae6b9b authored by MARIO LLESTA MUÑOZ's avatar MARIO LLESTA MUÑOZ
Browse files

TwitterCrawler.java

parent d387b823
package packageTwitter;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import kong.unirest.Callback;
import kong.unirest.GetRequest;
import kong.unirest.Headers;
import kong.unirest.HttpResponse;
import kong.unirest.Unirest;
import kong.unirest.UnirestException;
import com.google.gson.Gson;
import com.vdurmont.emoji.EmojiParser;
import packageTwitter.Search;
import packageTwitter.Tweet;
/*
* To perform data analysis, we only need the text of the Tweet and its author,
* so it is convenient to save it in a text file for later analysis.
* Once we have obtained the Tweets, we are going to save them in a file.
*/
public class TwitterCrawler implements Callback<String> {
private static final String bearer_token = "XXXXXXXXXXXXXXXXX";
private static final int MAX_TWEET_COUNT_PER_REQUEST = 450;
private long max_id = Long.MAX_VALUE;
private long tweetcount = 0;
private String screen_name = "";
private long pending = 0;
long request_twetcount = 0;
long tweets_obtained = 0;
List<Tweet> tweet_list_total = new ArrayList<Tweet>();
boolean finish = false;
Object wait_inner = new Object();
Object wait_outer = new Object();
/*
* TwitterCrawler class constructor
*/
public TwitterCrawler (int count, String query) {
this.tweetcount = count;
this.screen_name = query;
}
@Override
public void completed (HttpResponse<String> response) {
/* Rate Control */
Headers headers = response.getHeaders();
String limit = headers.getFirst("x-rate-limit-limit");
String remaining = headers.getFirst("x-rate-limit-remaining");
String reset = headers.getFirst("x-rate-limit-reset");
System.out.println("Tienes un lmite total de " + limit);
System.out.println("Puedes hacer " + remaining + " durante la ventana de 15 minutos");
long now_milis = Instant.now().toEpochMilli();
long now_seconds = now_milis / 1000;
long reset_seconds = (Long.parseLong(reset) - now_seconds);
System.out.println("La ventana se resetea en " + reset_seconds + " segundos");
Gson gson = new Gson();
Search search = gson.fromJson(response.getBody(), Search.class);
/**********************************************************************************************************/
//return search;
List<Tweet> tweet_list_request = new ArrayList<>();
tweet_list_request.addAll(Arrays.asList(search.statuses));
//Tweet.deserializeJsonArray(response.getBody());
tweets_obtained = tweet_list_request.size();
System.out.println("Recibidos " + tweets_obtained + " tweets");
/* We update tweetcount with those received */
pending -= tweets_obtained;
/* We update the total list of tweets */
tweet_list_total.addAll(tweet_list_request);
/*
* STOP if we have received the total number of tweets, we stop
*/
if (pending <= 0)
finish = true;
/*
* STOP if we have received less than request_twetcount there is no more.
* It is necessary to finalize
*/
if (tweets_obtained < request_twetcount)
finish = true;
if (!finish) {
for (Tweet tw : tweet_list_request) {
if (tw.id < max_id)
max_id = tw.id;
}
nextRequest();
} else {
System.out.println("Hemos finalizado de pedir tweets");
finish();
}
}
@Override
public void failed (UnirestException arg0) {
System.out.println("The request has failed \n" + arg0.getMessage());
}
@Override
public void cancelled() {
System.out.println("The request has been canceled");
}
public void nextRequest() {
long request_twetcount = 0;
long tweets_obtained = 0;
if (pending > MAX_TWEET_COUNT_PER_REQUEST) {
request_twetcount = MAX_TWEET_COUNT_PER_REQUEST;
} else {
request_twetcount = pending;
}
/*
* Critical code to get the Tweets
*/
CompletableFuture<HttpResponse<String>> json_str_Response = null;
GetRequest getReq = null;
getReq = Unirest.get(
"https://api.twitter.com/1.1/search/tweets.json?q={query}&count={count}&max_id={max_id}")
.routeParam("query", "shib")
.routeParam("count", "450" + request_twetcount)
.routeParam("max_id", "" + (max_id - 1)).header("Authorization", "Bearer " + bearer_token);
System.out.println("Request " + request_twetcount + " tweets to: " + getReq.getUrl());
json_str_Response = getReq.asStringAsync(this);
}
public void requestTweets() throws InterruptedException {
/*
* Si los tweets caben en una sla peticin, o bien se piden ms de
* MAX_TWEET_COUNT_PER_REQUEST en principio la primera peticin hay que hacerla
* para obtener el max_id (mximo id de la secuencia de tweets)
*/
synchronized (wait_inner) {
Search search = null;
pending = tweetcount;
long max_id = Long.MAX_VALUE;
//search = nextRequest();
nextRequest();
synchronized (wait_outer) {
wait_outer.wait();
}
//return search;
}
}
public List<Tweet> getTweets() {
return tweet_list_total;
}
public void finish() {
System.out.println("notify outer");
synchronized (wait_outer) {
wait_outer.notify();
}
}
public void ShutDown()
{
Unirest.shutDown();
}
public static void main(String args[]) throws IOException, InterruptedException {
try {
Search search = null;
TwitterCrawler tc = new TwitterCrawler(450, "shib");
tc.requestTweets();
System.out.println("llamada terminada");
synchronized (tc.wait_inner) {
tc.tweet_list_total.forEach(t -> System.out.println(t.text));
System.out.println("Guardando Tweets");
File f1 = new File("CryptoTweets_procesados_con_emojis_procesados.txt");
File f2 = new File("SHIBDia02.txt");
PrintWriter pr1 = new PrintWriter(f1);
PrintWriter pr2 = new PrintWriter(f2);
tc.tweet_list_total.forEach(t ->
{
String noBreaksTweet = t.text.replace("\n", "").replace("\r", "");
String noBreaksParsedEmojis = EmojiParser.parseToAliases(noBreaksTweet);
String noBreaksNoEmojisTweet = EmojiParser.removeAllEmojis(noBreaksTweet);
pr1.println(t.created_at + " ||| " + t.user.screen_name + " ||| " + noBreaksParsedEmojis);
pr2.println(t.created_at + " ||| " + t.user.screen_name + " ||| " + noBreaksNoEmojisTweet);
});
System.out.println("Tweets procesados con emojis guardados en " + f1.getAbsolutePath());
System.out.println("Tweets procesados sin emojis guardados en " + f2.getAbsolutePath());
pr1.flush();
pr2.flush();
pr1.close();
pr2.close();
}
//Unirest.shutDown();
} catch (UnirestException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} // Main end
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment