#!/bin/bash # megaupbash version 0.3 # gestor de descargas de megaupload # Bash script # # Escrito por _Poseidon_, comenzado el martes 26 de agosto de 2008, partiendo de la base de rapibash # Última modificación Martes 14 de abril de 2009 # Webpage http://pablo777.wordpress.com/ function main { settings if [ $# = 0 ] then if which zenity >"/dev/null"; then zenity-gui; elif which Xdialog >"/dev/null"; then Xdialog-gui; fi else case "$1" in start) PROCESS="$(ps ax -o pid,args)" if [ $(echo "$PROCESS"| grep "$0 start"| wc -l) -gt 1 ] then echo "$ERR5" else start_download fi ;; stop) stop_download ;; clean) clean_list ;; move) move_URI "$2" "$3";; add) add_URIs "$@" ;; z) zenity-gui ;; x) Xdialog-gui ;; esac fi } function start_download { settings if [ ! -w "$LIST_FILE" ] || [ ! -w "$LOG_FILE" ] || [ ! -w "$DOWNLOAD_STATUS_FILE" ] || [ ! -w "$DOWNLOAD_DIR" ] then echo "$ERR6" return 1 fi while [ $(cat "$LIST_FILE"| wc -l) -gt 0 ]; do each_URI="$(head -n 1 "$LIST_FILE")" echo "$each_URI" > "$DOWNLOAD_STATUS_FILE" echo "$MSG1 $each_URI" if HTML_FILE1="$(wget -q -O - "$each_URI")" then if echo "$HTML_FILE1" |grep -q "Unfortunately, the link you have clicked is not available." then echo "$ERR2" LIST="$(cat "$LIST_FILE" | grep -vx "$each_URI")" if [ ! -z "$LIST" ] then echo "$LIST" >"$LIST_FILE" else echo -n >"$LIST_FILE" fi else CAPTCHACODE="$(echo "$HTML_FILE1" | grep 'name="captchacode"' | sed -e 's/^.*value="//' | sed -e 's/".*$//')" MEGAVAR="$(echo "$HTML_FILE1" | grep 'name="megavar"' | sed -e 's/^.*value="//' | sed -e 's/".*$//')" GENCAP="$(echo "$HTML_FILE1" | grep "gencap" | sed -e 's/^.*src="//' | sed -e 's/".*$//')" wget -q -O "/tmp/captcha.gif" "$GENCAP" CAPTCHA="$(python "/tmp/megaupload_captcha.py" "/tmp/captcha.gif")" rm "/tmp/captcha.gif" if [ "$(echo -n "$CAPTCHA" |wc -c )" != "4" ] then echo "$ERR7" else if HTML_FILE2="$(wget -q --post-data "captchacode=$CAPTCHACODE&megavar=$MEGAVAR&captcha=$CAPTCHA" "$each_URI" -O - )" then DOWNLOADLINK="$(echo "$HTML_FILE2" | grep 'id="downloadlink"' | sed -e 's/^.*href="//' | sed -e 's/".*$//' )" DOWNLOADLINK="$(echo "$DOWNLOADLINK" | sed -e 's/\&\#[0-9]*\;/_/g')" # Reemplaza los caracteres raros por _ if [ ! -z "$DOWNLOADLINK" ] then #echo "$MSG3" #sleep 46 Ya no es necesario esperar echo "$MSG4 $each_URI" cd "$DOWNLOAD_DIR" if wget -a "$DOWNLOAD_STATUS_FILE" "$DOWNLOADLINK" then echo "$MSG5" LIST="$(cat "$LIST_FILE" | grep -vx "$each_URI")" if [ ! -z "$LIST" ] then echo "$LIST" >"$LIST_FILE" else echo -n >"$LIST_FILE" fi else echo "$ERR1 $each_URI $MSG6" fi else echo "$ERR8" fi else echo "$ERR1 $HTML_FILE2" fi fi fi fi done } function stop_download { set_status if [ "$DOWNLOADING" = "yes" ] then kill -9 $(echo "$PROCESS" | grep "$0 start" | awk -F " " '{print $1}') kill -9 $(echo "$PROCESS" | egrep "wget.*http://.+\.megaupload\.com/files/" | awk -F " " '{print $1}') fi } function clean_list { if echo -n >"$LIST_FILE";then return 0; else return 1; fi } function add_URIs { for each_argument in $(echo "$@" | tr -sc '[:alnum:]:/=?.' '\n' | egrep 'http://(www\.)?megaupload\.com/' | uniq) do echo "$each_argument">>"$LIST_FILE" done LIST="$(uniq "$LIST_FILE")" echo "$LIST" > "$LIST_FILE" } function move_URI { FROM="$1" TO="$2" FILE="$(cat "$LIST_FILE")" if [ "$FROM" != "$TO" ] then if [ $FROM -le $(echo "$FILE"| wc -l) ] && [ $TO -le $(echo "$FILE"| wc -l) ]; then LINE="$(echo "$FILE"| sed -n "$FROM"p)" NEW_LIST="$(echo "$FILE"| grep -vx "$LINE")" if [ "$TO" = 0 ] ; then NEW_LIST="$(echo "$NEW_LIST"| sed 1i"$LINE")" elif [ "$FROM" -lt "$TO" ] then NEW_LIST="$(echo "$NEW_LIST"| sed "$(($TO-1))"a"$LINE")" else NEW_LIST="$(echo "$NEW_LIST"| sed "$TO"a"$LINE")" fi echo "$NEW_LIST" >"$LIST_FILE"&& return 0 else return 1 fi fi } function zenity-gui { set_status if [ "$DOWNLOADING" = "yes" ] then startorstop="stop"; startorstoplabel="$ZG16" else startorstop="start"; startorstoplabel="$ZG15" fi if OPTION="$(zenity --list --hide-column=1 --column "" --column "" --text="$(if [ "$WGET_RUNNING" = "yes" ]; then echo -e "$DOING $(if [ $(echo $DOWNLOAD_PROGRESS| tr -d K ) -gt 1024 ];then echo $[$(echo $DOWNLOAD_PROGRESS | tr -d K )/1024]M; else echo $DOWNLOAD_PROGRESS; fi )/$DOWNLOAD_SIZE_H ($DOWNLOAD_PERCENTAGE)\n$ZG19 $DOWNLOAD_TIME_LEFT $ZG20 $DOWNLOAD_SPEED"; else echo "$DOING"; fi)" --title="$ZG1" --width 600 --height 410 actualizar "$ZG2" "$startorstop" "$startorstoplabel" view_list "$ZG3" add_uri "$ZG4" delete_uri "$ZG5" move_uri "$ZG6" clean_list "$ZG7" add_webpage "$ZG8" add_file "$ZG9" configure "$ZG22" view_log "$ZG21" )" then case "$OPTION" in view_list) zenity --list --column "" --title "$ZG3" --text="$DOING" --width 600 --height 410 $(cat "$LIST_FILE") ;; add_uri) if URIS="$(zenity --entry --text="$ZG11" --title="$ZG4" --width 600)" ;then add_URIs $URIS;fi ;; delete_uri) ARGS=""; for i in $(cat "$LIST_FILE"); do ARGS="$ARGS false $i ";done if DELETE="$(zenity --list --column "" --column "$ZG5" --checklist --separator " " --title "$ZG5" --text="$ZG5" --width 600 --height 410 $ARGS)"; then NEW_LIST="$(cat "$LIST_FILE")" for j in $DELETE do NEW_LIST="$(echo "$NEW_LIST" | grep -vx "$j")" if [ ! -z "$NEW_LIST" ] then echo "$NEW_LIST"> "$LIST_FILE" else echo -n > "$LIST_FILE" fi done fi ;; clean_list) clean_list ;; move_uri) ARGS="" COUNT=0 for each_line in $(cat "$LIST_FILE"); do COUNT="$(($COUNT+1))"; ARGS="$ARGS $COUNT $each_line"; done if FROM="$(zenity --list --column "" --column "" --title "$ZG6" --width 600 --height 410 --text "$ZG13" $ARGS)" && TO="$(zenity --list --column "" --column "" --title "$ZG6" --width 600 --height 410 --text "$ZG14" 0 "$ZG12" $ARGS)" then move_URI "$FROM" "$TO" fi ;; stop) stop_download ;; start) nohup "$0" "start" >>"$LOG_FILE"& sleep 1 ;; add_file) if A="$(zenity --file-selection --save --title="$ZG9")"; then add_URIs $(cat "$A") fi ;; add_webpage) if URI="$(zenity --entry --text="$ZG8" --title="$ZG8" --width 600)" && WEB_TEXT="$(wget -O - "$URI")" then add_URIs $WEB_TEXT fi ;; view_log) cat "$LOG_FILE" | zenity --text="$ZG21" --title="$Z21" --width 600 --height 410 --text-info;; configure) settings while OPTION2="$(zenity --title "$ZG22" --hide-column=1 --column "" --column "" --text "$ZG22" --width 600 --height 410 --list download_dir "$ZG24 [$DOWNLOAD_DIR]" list_file "$ZG25 [$LIST_FILE]" download_status_file "$ZG26 [$DOWNLOAD_STATUS_FILE]" log_file "$ZG27 [$LOG_FILE]" permissions "$ZG28 [$PERMISSIONS]" )";do case $OPTION2 in download_dir) if D=$(zenity --title "$ZG24" --file-selection --directory) then change DOWNLOAD_DIR "$D" fi ;; list_file) if F="$(zenity --title "$ZG25" --file-selection --save)" then change LIST_FILE "$F" fi ;; download_status_file) if F="$(zenity --title "$ZG26" --file-selection --save)" then change DOWNLOAD_STATUS_FILE "$F" fi ;; log_file) if F="$(zenity --title "$ZG27" --file-selection --save)" then change LOG_FILE "$F" fi ;; permissions) if P="$(zenity --title "$ZG28" --text "$ZG29" --entry)" then change PERMISSIONS "$P" fi ;; esac settings done ;; esac zenity-gui fi } ### COMENTARIO IMPORTANTE: TENÍA INTENCIONES DE HACER UN GUI CON KDIALOG, PERO ME DI CUENTA QUE KDIALOG ES ASQUEROSO, POR ESO NO LO HICE. function Xdialog-gui { set_status if [ "$DOWNLOADING" = "yes" ] then startorstop="stop"; startorstoplabel="$ZG16" else startorstop="start"; startorstoplabel="$ZG15" fi if OPTION="$(Xdialog --stdout --title="$ZG0" --cancel-label "$ZG23" --no-tags --menubox "$(if [ "$WGET_RUNNING" = "yes" ]; then echo -e "$DOING $DOWNLOAD_PROGRESS/$DOWNLOAD_SIZE_H ($DOWNLOAD_PERCENTAGE)\n$ZG19 $DOWNLOAD_TIME_LEFT $ZG20 $DOWNLOAD_SPEED"; else echo "$DOING"; fi)" 20 55 0 actualizar "$ZG2" "$startorstop" "$startorstoplabel" view_list "$ZG3" add_uri "$ZG4" delete_uri "$ZG5" move_uri "$ZG6" clean_list "$ZG7" add_webpage "$ZG8" add_file "$ZG9" configure "$ZG22" view_log "$ZG21")" then case "$OPTION" in view_list) Xdialog --title "$ZG3" --no-cancel --textbox "$LIST_FILE" 20 55 ;; add_uri) if URIS="$(Xdialog --stdout --title="$ZG4" --inputbox "$ZG11" 0 0)" ;then add_URIs $URIS;fi ;; delete_uri) ARGS=""; for i in $(cat "$LIST_FILE"); do ARGS="$ARGS $i $i off ";done if DELETE="$(Xdialog --stdout --separator " " --no-tags --title "$ZG5" --checklist "$ZG5" 20 55 0 $ARGS)"; then NEW_LIST="$(cat "$LIST_FILE")" for j in $DELETE do NEW_LIST="$(echo "$NEW_LIST" | grep -vx "$j")" if [ ! -z "$NEW_LIST" ] then echo "$NEW_LIST"> "$LIST_FILE" else echo -n > "$LIST_FILE" fi done fi ;; clean_list) clean_list ;; move_uri) ARGS="" COUNT=0 for each_line in $(cat "$LIST_FILE"); do COUNT="$(($COUNT+1))"; ARGS="$ARGS $COUNT $each_line"; done if FROM="$(Xdialog --stdout --title "$ZG6" --menubox "$ZG13" 20 55 0 $ARGS)" && TO="$(Xdialog --stdout --title "$ZG6" --menubox "$ZG14" 20 55 0 0 "$ZG12" $ARGS)" then move_URI "$FROM" "$TO" fi ;; stop) stop_download ;; start) nohup "$0" "start" >>"$LOG_FILE"& sleep 1 ;; add_file) if A="$(Xdialog --stdout --title="$ZG9" --fselect "*" 0 0)"; then add_URIs $(cat "$A") fi ;; add_webpage) if URI="$(Xdialog --stdout --title="$ZG8" --inputbox "$ZG8" 0 0)" && WEB_TEXT="$(wget -O - "$URI")" then add_URIs $WEB_TEXT fi ;; view_log) Xdialog --title "$ZG21" --no-cancel --textbox "$LOG_FILE" 20 55 ;; configure) settings while OPTION2="$(Xdialog --stdout --title "$ZG22" --no-tags --menubox "$ZG22" 20 55 0 download_dir "$ZG24 [$DOWNLOAD_DIR]" list_file "$ZG25 [$LIST_FILE]" download_status_file "$ZG26 [$DOWNLOAD_STATUS_FILE]" log_file "$ZG27 [$LOG_FILE]" permissions "$ZG28 [$PERMISSIONS]" )";do case $OPTION2 in download_dir) if D="$(Xdialog --stdout --title "$ZG24" --dselect "*" 20 55)" then change DOWNLOAD_DIR "$D" fi ;; list_file) if F="$(Xdialog --stdout --title "$ZG25" --fselect "*" 20 55)" then change LIST_FILE "$F" fi ;; download_status_file) if F="$(Xdialog --stdout --title "$ZG26" --fselect "*" 20 55)" then change DOWNLOAD_STATUS_FILE "$F" fi ;; log_file) if F="$(Xdialog --stdout --title "$ZG27" --fselect "*" 20 55)" then change LOG_FILE "$F" fi ;; permissions) if P="$(Xdialog --stdout --title "$ZG28" --inputbox "$ZG29" 0 0)" then change PERMISSIONS "$P" fi ;; esac settings done ;; esac Xdialog-gui fi } function change { variable="$1" value="$2" TEMPRC="$(grep -v "$variable=" "$CONF_FILE_USER")" echo "$TEMPRC" > "$CONF_FILE_USER" echo "$variable=$value" >> "$CONF_FILE_USER" } function set_status { PROCESS="$(ps ax -o pid,args)" DOWNLOADING="no" WGET_RUNNING="no" DOWNLOAD_SIZE="" DOWNLOAD_SIZE_H="" DOWNLOAD_PERCENTAGE="" DOWNLOAD_PROGRESS="" DOWNLOAD_SPEED="" DOWNLOAD_TIME_LEFT="" DOWNLOAD_URI="" DOING="$ZG18" if echo "$PROCESS"| grep "$0 start" > "/dev/null" then DOWNLOADING="yes" DOWNLOAD_URI="$(head -n 1 "$DOWNLOAD_STATUS_FILE")" DOING="$(tail -n 1 "$LOG_FILE")" if echo "$DOING" | grep "$MSG4" > "/dev/null" then WGET_RUNNING="yes" DOWNLOAD_SIZE="$(cat "$DOWNLOAD_STATUS_FILE" | egrep '[A-Za-Z]*: [0-9]* \(.*\) \[.*\]'|head -n 1 | sed -e 's/^.*: //' | sed -e 's/ .*$//')" DOWNLOAD_SIZE_H="$(cat "$DOWNLOAD_STATUS_FILE" | egrep '[A-Za-Z]*: [0-9]* \(.*\) \[.*\]' | sed -e 's/^.*(//' | sed -e 's/).*$//')" DOWNLOAD_PERCENTAGE="$(cat "$DOWNLOAD_STATUS_FILE"| tail -n 2 | head -n 1| awk -F " " '{print $7}')" DOWNLOAD_PROGRESS="$(cat "$DOWNLOAD_STATUS_FILE"| tail -n 2 | head -n 1| awk -F " " '{print $1}')" DOWNLOAD_SPEED="$(cat "$DOWNLOAD_STATUS_FILE"| tail -n 2 | head -n 1| awk -F " " '{print $8}')" DOWNLOAD_TIME_LEFT="$(cat "$DOWNLOAD_STATUS_FILE"| tail -n 2 | head -n 1| awk -F " " '{print $9}')" fi fi } function settings { ASCII_TABLE=' !"#$/& ()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^-`abcdefghijklmnopqrstuvwxyz' DOWNLOAD_DIR="$HOME" LIST_FILE="$HOME/.megaupbash-list" DOWNLOAD_STATUS_FILE="$HOME/.megaupbash.download.status" LOG_FILE="$HOME/.megaupbash.log" PERMISSIONS="600" CONF_FILE_GLOBAL="/etc/megaupbash.conf" CONF_FILE_USER="$HOME/.megaupbash.conf" if [ -r "$CONF_FILE_GLOBAL" ] then . "$CONF_FILE_GLOBAL" fi if [ -r "$CONF_FILE_USER" ] then . "$CONF_FILE_USER" fi if [ ! -e "$LIST_FILE" ]; then echo -n >"$LIST_FILE"; fi if [ ! -e "$LOG_FILE" ]; then echo -n >"$LOG_FILE"; fi if [ ! -e "$DOWNLOAD_STATUS_FILE" ]; then echo -n >"$DOWNLOAD_STATUS_FILE"; fi if [ ! -e "$CONF_FILE_USER" ]; then echo -n >"$CONF_FILE_USER"; fi chmod "$PERMISSIONS" "$LIST_FILE"; chmod "$PERMISSIONS" "$LOG_FILE"; chmod "$PERMISSIONS" "$DOWNLOAD_STATUS_FILE"; chmod "$PERMISSIONS" "$CONF_FILE_USER"; case $LANG in es*) MSG1="Buscando enlace de descarga para" MSG3="Esperando que el servidor autorize la descarga" MSG4="Descargando" MSG5="Descarga completa" MSG6="Reintentando" MSG11="Lista de descargas borrada" ERR0="Error: No se puede determinar el enlace de descarga para" ERR1="Error al descargar" ERR2="Archivo borrado" ERR3="Limite de descargas excedido, esperando 4 minutos y reintentando" ERR4="Error: no se puede determinar cuanto tiempo esperar, esperando 300 segundos" ERR5="Error: Este programa ya está ejecutándose" ERR6="Error: Permisos insuficientes" ERR7="Error: no se pudo determinar el codigo captcha" ERR8="Error al resolver el codigo javascript" ZG0="MegaupBash" ZG1="MegaupBash GUI basado en zenity" ZG2="Actualizar información sobre el estado del programa" ZG3="Ver lista de descargas" ZG4="Agregar URI a la lista de descargas" ZG5="Borrar URI de la lista de descargas" ZG6="Reordenar lista de descargas" ZG7="Borrar lista de descargas" ZG8="Encontrar enlaces en página web" ZG9="Encontrar enlaces en archivo" ZG10="Detener las descargas" ZG11="Agregar URI a la lista de descargas. Se pueden agregar varias separadas por un espacio." ZG12="Mover al principio de la lista" ZG13="Mover la descarga" ZG14="Despues de" ZG15="Iniciar descargas" ZG16="Detener descargas" ZG17="Estado actual:" ZG18="Detenido" ZG19="Tiempo restante estimado:" ZG20="Velocidad de descarga:" ZG21="Ver log" ZG22="Modificar configuración" ZG23="Cerrar ventana" ZG24="Directorio de descargas" ZG25="Archivo de lista" ZG26="Archivo de estado de descarga" ZG27="Archivo de log" ZG28="Permisos para archivos" ZG29="Permisos para archivos. Poner este valor a 666 para permitir a otros usuarios modificar la lista. Ponerlo a 600 para denegarselo." ;; *) MSG1="Looking for the download link for" MSG3="Waiting for the server allow downloading" MSG4="Downloading" MSG5="Download complete" MSG6="Retrying" MSG11="Download list cleared" ERR0="Error: Can't find the download link for" ERR1="Error while downloading" ERR2="File has been deleted" ERR3="Download limit exceded, waiting 4 minutes and retrying" ERR4="Error: can't be determinated the wait time, waiting 300 seconds" ERR5="Error: this program is already running" ERR6="Error: Permission denied" ERR7="Error: Can't determine the captcha code" ERR8="Error resolving the javascript code" ZG0="MegaupBash" ZG1="MegaupBash zenity based GUI" ZG2="Refresh download status info" ZG3="View download list" ZG4="Add URI to the download list" ZG5="Delete URI from the download list" ZG6="Move an element in the download list" ZG7="Delete the download list" ZG8="Find links in webpage" ZG9="Find links in file" ZG10="Stop downloads" ZG11="Add URI to the download list. Multiple allowed" ZG12="Move to the begin of the list" ZG13="Move the download" ZG14="Under" ZG15="Start downloads" ZG16="Stop downloads" ZG17="Status:" ZG18="Stopped" ZG19="Estimated time left:" ZG20="Download speed:" ZG21="View log" ZG22="Settings" ZG23="Close window" ZG24="Download directory" ZG25="List file" ZG26="Download status file" ZG27="Log file" ZG28="File permissions" ZG29="File permissions. Set this value to 666 will allow to another users to modify de download list. Set it to 600 to deny them." ;; esac } tail -n 375 "$0" > "/tmp/megaupload_captcha.py" main "$@" exit ########################################################## ### A partir de esta linea es código extraído de plowshare ### http://code.google.com/p/plowshare/wiki/BreakingMegauploadCaptcha ### Gracias a los desarrolladores de plowshare ########################################################## #!/usr/bin/python # # This file is part of Plowshare. # # Plowshare is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Plowshare is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Plowshare. If not, see . # # Author: Arnau Sanchez """ Decode the captcha used by Megaupload (2009/03/20): 4 bold characters (letter-letter-letter-digit), rotated and overlapped. Dependencies: Tesseract-ocr, Python Imaging Library """ import os import sys import logging import string import tempfile import operator import subprocess from itertools import tee, izip, groupby from StringIO import StringIO # Third-party modules import PIL.Image as Image # Verbose levels _VERBOSE_LEVELS = { 0: logging.CRITICAL, 1: logging.ERROR, 2: logging.WARNING, 3: logging.INFO, 4: logging.DEBUG, } # Generic functions def replace_chars(s, table): """Use dictionary table to replace chare in s.""" for key, value in table.iteritems(): s = s.replace(key, value) return s def center_of_mass(coords): """Return center of mass of coordinates""" return [sum(lst)/len(lst) for lst in zip(*coords)] def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." a, b = tee(iterable) b.next() return izip(a, b) def distance2(vector1, vector2): """Return quadratic module distance between vector1 and vector2""" return sum((x*x) for x in map(operator.sub, vector1, vector2)) def combinations_no_repetition(seq, k): """Yield combinations of k elements from seq without repetition""" if k > 0: for index, x in enumerate(seq): for y in combinations_no_repetition(seq[index+1:], k-1): yield (x,)+y else: yield () def union_sets(sets): """Return union of sets.""" return reduce(set.union, sets) def segment(seq, k): """Return a segmentation of elements in seq in groups of k.""" if k > 1: for length in range(1, (len(seq)-k+1)+1): for x in segment(seq[length:], k-1): yield (seq[:length],) + x else: yield (seq,) def run(command, inputdata=None): """Run a command and return standard output""" pipe = subprocess.PIPE popen = subprocess.Popen(command, stdout=pipe, stderr=pipe) outputdata = popen.communicate(inputdata)[0] assert (popen.returncode == 0), "Error running: %s" % command return outputdata def ocr(image): """OCR an image and return text""" temp_tif = tempfile.NamedTemporaryFile(suffix=".tif") temp_txt = tempfile.NamedTemporaryFile(suffix=".txt") image.save(temp_tif, format="TIFF") run(["tesseract", temp_tif.name, os.path.splitext(temp_txt.name)[0]]) return open(temp_txt.name).read() def histogram(it, reverse=False): """Return sorted (ascendent) histogram of elements in iterator.""" pairs = ((value, len(list(grp))) for (value, grp) in groupby(sorted(it))) return sorted(pairs, key=lambda (k, v): v, reverse=reverse) def get_pair_inclussion(seq, value, pred=None): """Given a sequence find the boundaries of value.""" if pred is None: pred = lambda x: x for val1, val2 in pairwise(seq): if pred(val1) <= value <= pred(val2): return val1, val2 # Generic PIL functions def smooth(image0, value): """Smooth image spreading values of a pixel.""" image = image0.copy() ipimage0 = image0.load() ipimage = image.load() width, height = image.size for x, y in iter_image(image): if ipimage0[x, y] == value: if x+1 < width: ipimage[x+1, y] = value if y+1 < height: ipimage[x, y+1] = value return image def merge_image_with_pixels(image0, pixels, value): """Set pixels in image to given value and return new image.""" image = image0.copy() ipimage = image.load() for (x, y) in pixels: ipimage[x, y] = value return image def floodfill_image(image0, (x, y), fill_color, threshold=0): """Flood fill image with fill color in given position. Return a tuple with filled image and list of positions of filled pixels. See http://mail.python.org/pipermail/image-sig/2005-September/003559.html """ image = image0.copy() width, height = image.size def is_within((x, y)): """Return True if (x, y) is inside image""" return (0 <= x < width and 0 <= y < height) ipimage = image.load() background_value = ipimage[x, y] ipimage[x, y] = fill_color edge = [(x, y)] filled = set(edge) while edge: newedge = [] for x, y in edge: for s, t in ((x+1, y), (x-1, y), (x, y+1), (x, y-1)): if (s, t) in filled or not is_within((s, t)): continue pixel = ipimage[s, t] if abs(pixel - background_value) <= threshold: ipimage[s, t] = fill_color newedge.append((s, t)) filled.update(newedge) edge = newedge return image, filled def iter_image(image): """Yield (x, y) pairs to walk an image positions.""" w, h = image.size return ((x, y) for x in range(w) for y in range(h)) def get_zones(image, seen0, value, minpixels=1): """ Scan an image and return groups of pixels with a given value having the same color. Ignore pixels already found in seen. Ignore groups with less thatn minpixels. """ seen = seen0.copy() pixels = image.load() for x, y in iter_image(image): if (x, y) not in seen and pixels[x, y] == value: filled = floodfill_image(image, (x, y), 50)[1] if len(filled) > minpixels: seen.update(filled) yield filled def new_image_from_pixels(pixels, value): """Return an image from a group of pixels (remove offset).""" xs, ys = zip(*pixels) x1, y1 = min(xs), min(ys) x2, y2 = max(xs), max(ys) image = Image.new("L", (x2-x1+1, y2-y1+1), 255) ipimage = image.load() for (x, y) in pixels: ipimage[x-x1, y-y1] = value return image def join_images_horizontal(images): """Join images to build a new image with (width, height) size.""" width = sum(i.size[0] for i in images) height = max(i.size[1] for i in images) himage = Image.new("L", (width, height), 255) x = 0 for image in images: w, h = image.size himage.paste(image, (x, (height - h)/2)) x += w return himage ### Megaupload captcha decoder functions def filter_word(word0): """Check if a word is a valid captcha (make also some basic corrections).""" def string2dict(s): """Convert pairs of chars in string to dictionary. Example: ('AB CD') -> {'A': 'B'}, {'C': 'D'}.""" return dict(tuple(pair) for pair in s.split()) str_digit_to_letter = "1T 2Z 4A 5S 6G 7T 8B" str_letter_to_letter = "{C (C [C IC" allowed_chars = string.uppercase + string.digits digit_to_letter = string2dict(str_digit_to_letter) letter_to_letter = string2dict(str_letter_to_letter) letter_to_digit = dict((v, k) for (k, v) in digit_to_letter.iteritems()) wordlst1 = list(word0.upper().replace(" ", "")) if len(wordlst1) != 4: return wordlst2 = [replace_chars(replace_chars(w, digit_to_letter), letter_to_letter) for w in wordlst1[:3]] + \ [replace_chars(wordlst1[3], letter_to_digit)] wordlst = [c for c in wordlst2 if c in allowed_chars] if len(wordlst) == 4 and (wordlst[0] in string.uppercase and wordlst[1] in string.uppercase and wordlst[2] in string.uppercase and wordlst[3] in string.digits): return "".join(wordlst) def get_error(pixels_list, image): """Return error for a given pixels groups againt the expected positions.""" width, height = image.size gap_width = width / 6.0 def error_for_pixels(pixels, n): """Return error for pixels (character n in captcha).""" com_x, com_y = center_of_mass(pixels) return distance2((com_x, com_y), ((1.5*n+1)*gap_width, (height/2.0))) return sum(error_for_pixels(pxls, n) for n, pxls in enumerate(pixels_list)) def build_candidates(characters4_pixels_list, uncertain_pixels, rotation=22): """Build word candidates from characters and uncertains groups.""" for plindex, characters4_pixels in enumerate(characters4_pixels_list): logging.debug("Generating words (%d) %d/%d", 2**len(uncertain_pixels), plindex+1, len(characters4_pixels_list)) for length in range(len(uncertain_pixels)+1): for groups in combinations_no_repetition(uncertain_pixels, length): characters4_pixels_test = [x.copy() for x in characters4_pixels] for pixels in groups: pair = get_pair_inclussion(characters4_pixels_test, center_of_mass(pixels)[0], pred=lambda x: center_of_mass(x)[0]) if not pair: continue char1, char2 = pair char1.update(pixels) char2.update(pixels) def rotate_character(pixels, index): """Rotate captcha character in position index.""" image = new_image_from_pixels(pixels, 1) angle = rotation * (+1 if (index % 2 == 0) else -1) rotated_image = image.rotate(angle, expand=True) return rotated_image.point(lambda x: 0 if x == 1 else 255) images = [rotate_character(pixels, cindex) for cindex, pixels in enumerate(characters4_pixels_test)] clean_image = smooth(join_images_horizontal(images), 0) text = ocr(clean_image).strip() filtered_text = filter_word(text) #logging.debug("%s -> %s", text, filtered_text) if filtered_text: yield filtered_text def decode_megaupload_captcha(imagedata, maxiterations=1): """Decode a Megaupload catpcha image Expected 4 letters (LETTER LETTER LETTER DIGIT), rotated and overlapped""" original = Image.open(imagedata) # Get background zone width, height = original.size image = Image.new("L", (width+2, height+2), 255) image.paste(original, (1, 1)) background_pixels = floodfill_image(image, (0, 0), 155)[1] logging.debug("Background pixels: %d", len(background_pixels)) # Get characters zones characters_pixels = sorted(get_zones(image, background_pixels, 0, 10), key=center_of_mass) logging.debug("Characters: %d - %s", len(characters_pixels), [len(x) for x in characters_pixels]) if len(characters_pixels) < 4: logging.error("Need at least 4 characters zones in image (%d found)", len(characters_pixels)) return characters_pixels_list0 = [[union_sets(sets) for sets in x] for x in segment(characters_pixels, 4)] characters4_pixels_list = sorted(characters_pixels_list0, key=lambda pixels_list: get_error(pixels_list, image))[:maxiterations] # Get uncertain zones seen = union_sets([background_pixels] + characters_pixels) max_uncertain_groups = 6 uncertain_pixels = list(sorted(get_zones(image, seen, 255, 20), key=len))[:max_uncertain_groups] logging.debug("Uncertain groups: %d - %s", len(uncertain_pixels), [len(pixels) for pixels in uncertain_pixels]) # Build candidates candidates = build_candidates(characters4_pixels_list, uncertain_pixels) # Return best decoded word candidates_histogram = [histogram(charpos, reverse=True) for charpos in zip(*[list(candidate) for candidate in candidates])] if not candidates_histogram: logging.warning("No word candidates") return logging.info("Best words: %s", candidates_histogram) best = [x[0][0] for x in candidates_histogram] return "".join(best) def set_verbose_level(verbose_level): """Set verbose level for logging. See _VERBOSE_LEVELS constant for allowed values.""" level = _VERBOSE_LEVELS[max(0, min(verbose_level, len(_VERBOSE_LEVELS)-1))] logging.basicConfig(level=level, stream=sys.stderr, format='%(levelname)s: %(message)s') def main(args): """Main function for megaupload captcha decoder.""" import optparse usage = """usage: megaupload_captcha [OPTIONS] [IMAGE_FILE] Decode Megaupload captcha.""" parser = optparse.OptionParser(usage) parser.add_option('-v', '--verbose', dest='verbose_level', action="count", default=None, help='Increate verbose level (0: CRITICAL ... 4: DEBUG)') parser.add_option('-i', '--max-iterations', dest='max_iterations', default=1, metavar='NUM', type='int', help='Maximum iterations for characters agrupations') options, args0 = parser.parse_args(args) if not args0: parser.print_help() return 1 set_verbose_level((1 if options.verbose_level is None else options.verbose_level)) filename, = args0 stream = (sys.stdin if filename == "-" else open(filename)) logging.debug("Maximum iterations: %s" % options.max_iterations) captcha = decode_megaupload_captcha(StringIO(stream.read()), options.max_iterations) if not captcha: logging.error("Cannot decode captcha image") return 1 sys.stdout.write(captcha+"\n") if __name__ == '__main__': sys.exit(main(sys.argv[1:]))