#include <iostream>
#include <stdio.h>
#include <fstream>
#include <string>
#include <string.h>
#include <sstream>
#include <windows.h>
#include <shellapi.h>
#include <dos.h>
#include <dirent.h>
using namespace std;
char* initializestrings(char* s, int l)
{
for(int i=0;i<l;i++)
{
s[i]='\0';
}
return s;
}
string trim_spaces(string str)
{
int y;
if(str[0] == ' ')
{
for(int i=0;i<str.size();i++)
{
if(str[i]==' ' && str[i+1] != ' ')
{
y=i+1;
break;
}
else
continue;
}
str = str.substr(y, str.size()-y);
return str;
}
else
{
return str;
}
}
bool checkextraques(int a[], int x)
{
bool var = false;
for(int i=0;i<15;i++)
{
if(a[i]==x)
var = true;
}
return var;
}
string find_domain(string url)
{
int findslash;
findslash = url.find("/");
const char* temp = url.c_str();
url="";
for(int i=findslash+2;temp[i]!='/';i++)
{
url=url+temp[i];
}
return url;
}
string remove_double_quotes(string x)
{
int y=x.find("\"");
while(y>=0)
{
x.replace(y,1,"'");
y=x.find("\"");
}
return x;
}
bool is_file_exist(const char *fileName)
{
struct dirent *entry;
string contents[400];
int i=0;
DIR *dir = opendir("C:/Users/USER/Desktop");
while ((entry = readdir(dir)) != NULL)
{
if(strcmpi(entry->d_name,fileName)==0)
{
closedir(dir);
return true;
}
}
return false;
}
int main()
{
int dwiterator = 0;
int siteoption =0;
char ch;
string tempqa[1000], res[200], resv[500];
char thumbnailurl[400];
int qi, ri; qi = 0; ri = 0;
ifstream html[5];
ofstream pythonscript[100];
do
{
string url,domain,ques,ans;
int quesloop, ansloop; quesloop =0; ansloop =0;
cout<<"Enter your URL\n";
cin>>url;
domain = find_domain(url);
string alloweddomains[] = {"www.playbuzz.com", "play.howstuffworks.com", "www.zoo.com",
"brainfall.com", "www.funtrivia.com", "www.proprofs.com", "www.thequiz.com", "altgalaxy.co",
"status4everyone.com", "www.magiquiz.com", "www.allthetests.com", "www.women.com", "www.beano.com"};
for(int i=0;i<13;i++)
{
if(domain==alloweddomains[i])
{
siteoption = i+1;
break;
}
}
if(siteoption==0)
{
cout<<"Domain: " <<domain<<" This domain is invalid\n";
exit(0);
}
if(siteoption == 1)
{
ques = "pb-quiz-text-card embed-responsive-item\"";
ans = "answer-wrapper\"";
quesloop = 0;
ansloop = 0;
}
if(siteoption == 2 || siteoption == 3)
{
ques = "title-tertiary\"";
ans = "answer answer-default\"";
quesloop = 1;
ansloop = 1;
}
if(siteoption == 4)
{
ques = "question_title\"";
ans = "answer noselect\"";
quesloop = 1;
ansloop = 1;
}
if(siteoption == 5)
{
ques = "qntxtbox\"";
ans = "answerSelected(this);\"";
quesloop = 5;
ansloop = 1;
}
if(siteoption == 6)
{
ques = "question-text\"";
ans = "opt_text\"";
quesloop = 1;
ansloop = 2;
}
if(siteoption == 7)
{
ques="question-title\"";
ans="js-disabled";
quesloop = 1;
ansloop = 1;
}
if(siteoption == 8)
{
ques="wpvqgr-question-label\"";
ans="wpvqgr-answer-label\"";
quesloop=1;
ansloop=1;
}
if(siteoption == 9)
{
ques="snax-quiz-question-title\"";
ans="snax-quiz-answer-label-text\"";
quesloop=1;
ansloop=1;
}
if(siteoption == 10)
{
ques="question-title overlay\"";
ans="fa fa-check\"";
quesloop = 1;
ansloop = 3;
}
if(siteoption == 11)
{
ques="class=\"questions";
ans="class=\"answer\">";
quesloop = 8;
ansloop = 2;
}
if(siteoption == 12)
{
ques = "\"caption_background\":";
ans = "\"title\":";
quesloop = 0;
ansloop = 0;
}
if(siteoption == 13)
{
ques = "\"QuizQuestionText-text-";
ans = "\"QuizAnswer-text-";
quesloop = 2;
ansloop = 1;
}
pythonscript[dwiterator].open("pyparser.py");
string script = "from selenium import webdriver\nfrom selenium.webdriver.chrome.options import Options\noptions=Options()\nurl = ''\ndomain = ''\noptions.add_experimental_option( 'prefs',{'profile.managed_default_content_settings.javascript': 2})\ndriver = webdriver.Chrome('C:/Users/USER/Desktop/chromedriver.exe', options=options)\ndriver.get(url)\nraw_html=driver.page_source\ndriver.quit()\nfrom bs4 import BeautifulSoup as bs\nsoup = bs(raw_html,features='html.parser')\nif domain == 'www.women.com':\n script_content = soup.find(id='wdc_quiz_data_json')\n str_script_content = str(script_content)\n str_script_content = str_script_content[49:len(str_script_content) - 9]\n import json\n str_script_content = (json.dumps(json.loads(str_script_content), indent=2))\n with open('C:/Users/USER/Desktop/to_reverse.txt', 'w', encoding='utf-8') as f:\n f.write(str_script_content)\n with open('C:/Users/USER/Desktop/to_reverse.txt') as f, open('C:/Users/USER/Desktop/htmlparsing.txt', 'w') as fout:\n fout.writelines(reversed(f.readlines()))\nelse:\n prettyHTML = soup.prettify()\n with open('C:/Users/USER/Desktop/htmlparsing.txt', 'w', encoding='utf-8') as f:\n f.write(prettyHTML.replace('&', '&'))";
int findurl=script.find("url = ");
int finddomain=script.find("domain = ");
script.insert(finddomain+10, domain);
script.insert(findurl+7, url);
pythonscript[dwiterator]<<script;
pythonscript[dwiterator].close();
ShellExecuteA(NULL, "open", "pyparser.py", NULL, NULL, SW_SHOWNORMAL);
while(!is_file_exist("htmlparsing.txt"))
{
Sleep(2000);
}
Sleep(3000);
html[dwiterator].open("htmlparsing.txt");
string line;
while(!html[dwiterator].eof())
{
line = "";
getline(html[dwiterator], line);
int y=0;
string x; x="";
int findques = 0;
findques = line.find(ques);
if(findques>0)
{
if(siteoption == 1)
{
for(int i=1;i<=15;i++)
{
line="";
getline(html[dwiterator], line);
int pfindques = line.find("<p>");
if(pfindques>=0)
{
line="";
getline(html[dwiterator], line);
break;
}
}
}
if(siteoption == 12)
{
line = "";
getline(html[dwiterator], line);
int pfind; string temp; temp="";
pfind = line.find("\": ");
for(int i=pfind+4; line[i]!='"';i++)
temp = temp+line[i];
line = "";
line=temp;
}
else
{
for(int i=1;i<=quesloop;i++)
{
line = "";
getline(html[dwiterator], line);
}
}
x=trim_spaces(line.c_str());
x=remove_double_quotes(x);
x.insert(0, "Q\"");
x.append("\"");
tempqa[qi] = x;
qi++;
y=1;
}
if(y==0)
{
int findans = 0;
findans = line.find(ans);
if(findans>0)
{
if(siteoption == 1)
{
for(int i=1;i<=20;i++)
{
line="";
getline(html[dwiterator], line);
int pfindans = line.find("<p>");
if(pfindans>=0)
{
line="";
getline(html[dwiterator], line);
break;
}
}
}
if(siteoption == 12)
{
string temp; temp="";
getline(html[dwiterator], temp);
int find_res_wt;
find_res_wt = temp.find("\"result_weight\":");
if(find_res_wt>0)
{
int pfind;
temp="";
pfind = line.find("\": ");
for(int i=pfind+4; line[i]!='"';i++)
temp = temp+line[i];
line = "";
line=temp;
}
else
continue;
}
else
{
for(int i=1;i<=ansloop;i++)
{
line = "";
getline(html[dwiterator], line);
}
}
x=trim_spaces(line.c_str());
x=remove_double_quotes(x);
x.insert(0, "A\"");
x.append("#\"");
tempqa[qi] = x;
qi++;
y=1;
}
}
}
html[dwiterator].close();
remove("pyparser.py");
remove("htmlparsing.txt");
++dwiterator;
if(dwiterator<5)
{
cout<<"Do you want to continue? (Press 'Y' to continue and 'N' to exit)\n";
cin>>ch;
}
}
while(toupper(ch)!='N' && dwiterator<5);
int num_of_elements = sizeof(tempqa)/sizeof(tempqa[0]);
for(int i=0;i<num_of_elements;i++)
{
char ch,ch1;
ch = tempqa[i].c_str()[0];
if(i!=num_of_elements-1)
ch1 = tempqa[i+1].c_str()[0];
if(ch=='Q' && ch1=='Q')
{
for(int j=i;j<num_of_elements-1;j++)
{
tempqa[j]=tempqa[j+1];
}
}
}
ofstream allquestions;
allquestions.open("allquestions.txt");
for(int i=0;i<qi;i++)
{
char ch = tempqa[i].c_str()[0];
if(ch=='Q')
allquestions<<"Index of Question "<<i<<" "<<tempqa[i]<<"\n";
else
allquestions<<tempqa[i]<<"\n";
}
allquestions.close();
/*int allquestionsindex[15];
cout<<"Enter the index of all questions which you want to have:\n";
for(int i=0;i<15;i++)
cin>>allquestionsindex[i];*/
string mac[1000]; int y=0; int k=0;
for(int i=0;i<qi-1;i++)
{
char ch = tempqa[i].c_str()[0];
if(ch == 'Q' && k<15 && tempqa[i+1].c_str()[0]=='A')
{
mac[y] = tempqa[i];
y++;
for(int j=i+1;tempqa[j].c_str()[0]=='A';j++)
{
mac[y]=tempqa[j];
y++;
}
k++;
}
}
cin.ignore();
ofstream csv;
string qcsv = "trivia_quiz.csv";
csv.open(qcsv);
string quiz_title;
cout<<"Enter the title of this quiz\n";
getline(cin, quiz_title);
quiz_title=remove_double_quotes(quiz_title);
quiz_title.insert(0,"\"");
quiz_title.append("\"");
string first_row[25] = {"quiz_title", "Type", "question", "answer_1", "answer_2", "answer_3", "answer_4", "answer_5", "answer_6",
"answer_7", "answer_8", "answer_9", "answer_10", "answer_11", "answer_12", "answer_13", "answer_14", "answer_15", "answer_16", "answer_17",
"answer_18", "answer_19", "answer_20"};
int total_rows = k;
int rt, qt; rt=2; qt=0;
for(int t=0;t<23;t++)
csv<<first_row[t]<<",";
csv<<"\n";
for(int i=1;i<=total_rows;i++)
{
if(i==1)
{
csv<<quiz_title<<","<<"trivia"<<",";
}
else
{
for(int t=1;t<=2;t++)
csv<<""<<",";
}
csv<<mac[qt].substr(1,mac[qt].size()-1)<<",";
int j;
for(j=qt+1;mac[j].c_str()[0]=='A';j++)
{
csv<<mac[j].substr(1,mac[j].size()-1)<<",";
}
qt=j;
csv<<"\n";
}
csv.close();
return 0;
}
thanks agencia de marketing digital en monterreylistas m3ulistas iptvver futbol onlinerosadin tv
ReplyDelete