/* * Copyright (C) 2004, 2005 Jean-Yves Lefort * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Jean-Yves Lefort nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include #include #include #include "translate-service-private.h" #include "translate.h" #include "translate-pair-private.h" #define LOCK(session) g_mutex_lock((session)->priv->mutex) #define UNLOCK(session) g_mutex_unlock((session)->priv->mutex) enum { PROP_0, PROP_SERVICES, PROP_PAIRS, PROP_MAX_THREADS, PROP_MAX_RETRIES }; struct _TranslateSessionPrivate { GMutex *mutex; GSList *services; GSList *pairs; unsigned int max_threads; int max_retries; }; typedef struct { GMutex *mutex; GCond *progress_cond; TranslateSession *session; GSList *services; GSList *chunks; const char *from; const char *to; GError *err; } TranslateInfo; typedef struct { TranslateInfo *info; const char *chunk; char *translated; double progress; } TranslateChunkInfo; static GObjectClass *parent_class = NULL; static void translate_session_register_type (GType *type); static void translate_session_class_init (TranslateSessionClass *class); static void translate_session_init (TranslateSession *session); static void translate_session_finalize (GObject *object); static void translate_session_set_property (GObject *object, unsigned int prop_id, const GValue *value, GParamSpec *pspec); static void translate_session_get_property (GObject *object, unsigned int prop_id, GValue *value, GParamSpec *pspec); static void translate_session_translate_thread (gpointer data, gpointer user_data); static gboolean translate_session_translate_progress_cb (double progress, gpointer user_data); static GSList *translate_session_split (const char *text, unsigned int max_chunk_len); GType translate_session_get_type (void) { static GType type; static GOnce once = G_ONCE_INIT; g_once(&once, (GThreadFunc) translate_session_register_type, &type); return type; } static void translate_session_register_type (GType *type) { static const GTypeInfo info = { sizeof(TranslateSessionClass), NULL, NULL, (GClassInitFunc) translate_session_class_init, NULL, NULL, sizeof(TranslateSession), 0, (GInstanceInitFunc) translate_session_init }; *type = g_type_register_static(G_TYPE_OBJECT, "TranslateSession", &info, 0); } static void translate_session_class_init (TranslateSessionClass *class) { GObjectClass *object_class = G_OBJECT_CLASS(class); g_type_class_add_private(class, sizeof(TranslateSessionPrivate)); parent_class = g_type_class_peek_parent(class); object_class->finalize = translate_session_finalize; object_class->set_property = translate_session_set_property; object_class->get_property = translate_session_get_property; /* keep the default values in sync with the setters documentation */ g_object_class_install_property(object_class, PROP_SERVICES, g_param_spec_pointer("services", _("Services"), _("The list of services used by this session"), G_PARAM_READABLE | G_PARAM_WRITABLE | G_PARAM_CONSTRUCT)); g_object_class_install_property(object_class, PROP_PAIRS, g_param_spec_pointer("pairs", _("Pairs"), _("The list of pairs this session supports"), G_PARAM_READABLE)); g_object_class_install_property(object_class, PROP_MAX_THREADS, g_param_spec_uint("max-threads", _("Maximum number of threads"), _("The maximum number of translation threads to use"), 0, G_MAXINT, 8, G_PARAM_READABLE | G_PARAM_WRITABLE | G_PARAM_CONSTRUCT)); g_object_class_install_property(object_class, PROP_MAX_RETRIES, g_param_spec_int("max-retries", _("Maximum number of retries"), _("The maximum number of times to retry a translation of the same chunk (-1 for unlimited)"), -1, G_MAXINT, 3, G_PARAM_READABLE | G_PARAM_WRITABLE | G_PARAM_CONSTRUCT)); } static void translate_session_init (TranslateSession *session) { session->priv = G_TYPE_INSTANCE_GET_PRIVATE(session, TRANSLATE_TYPE_SESSION, TranslateSessionPrivate); session->priv->mutex = g_mutex_new(); } static void translate_session_finalize (GObject *object) { TranslateSession *session = TRANSLATE_SESSION(object); g_slist_foreach(session->priv->services, (GFunc) g_object_unref, NULL); g_slist_free(session->priv->services); g_slist_foreach(session->priv->pairs, (GFunc) g_object_unref, NULL); g_slist_free(session->priv->pairs); g_mutex_free(session->priv->mutex); parent_class->finalize(object); } static void translate_session_set_property (GObject *object, unsigned int prop_id, const GValue *value, GParamSpec *pspec) { TranslateSession *session = TRANSLATE_SESSION(object); switch (prop_id) { case PROP_SERVICES: { GSList *l; LOCK(session); g_slist_foreach(session->priv->services, (GFunc) g_object_unref, NULL); g_slist_free(session->priv->services); session->priv->services = g_slist_copy(g_value_get_pointer(value)); g_slist_foreach(session->priv->services, (GFunc) g_object_ref, NULL); g_slist_foreach(session->priv->pairs, (GFunc) g_object_unref, NULL); g_slist_free(session->priv->pairs); session->priv->pairs = NULL; for (l = session->priv->services; l != NULL; l = l->next) { TranslateService *service = l->data; session->priv->pairs = translate_pairs_merge(session->priv->pairs, translate_service_get_pairs(service)); } UNLOCK(session); g_object_notify(object, "pairs"); } break; case PROP_MAX_THREADS: LOCK(session); session->priv->max_threads = g_value_get_uint(value); UNLOCK(session); break; case PROP_MAX_RETRIES: LOCK(session); session->priv->max_retries = g_value_get_int(value); UNLOCK(session); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); break; } } /** * translate_session_set_services: * @session: a session. * @services: a list of #TranslateService objects to use. * * Sets the services used by @session. Default value: %NULL. * * The list of services determines the available language pairs (see * translate_session_get_pairs()). The order of the list is important, * as translate_session_translate_text() and * translate_session_translate_web_page() iterate over the list until * the translation can be performed. **/ void translate_session_set_services (TranslateSession *session, const GSList *services) { g_return_if_fail(TRANSLATE_IS_SESSION(session)); g_object_set(G_OBJECT(session), "services", services, NULL); } /** * translate_session_set_max_threads: * @session: a session. * @max_threads: a maximum number of threads. * * Sets the maximum number of concurrent threads to use for * translating text. Default value: 8. **/ void translate_session_set_max_threads (TranslateSession *session, unsigned int max_threads) { g_return_if_fail(TRANSLATE_IS_SESSION(session)); g_object_set(G_OBJECT(session), "max-threads", max_threads, NULL); } /** * translate_session_set_max_retries: * @session: a session. * @max_retries: a maximum number of retries. * * Sets the maximum number of retries per text chunk. Default value: * 3. **/ void translate_session_set_max_retries (TranslateSession *session, unsigned int max_retries) { g_return_if_fail(TRANSLATE_IS_SESSION(session)); g_object_set(G_OBJECT(session), "max-retries", max_retries, NULL); } static void translate_session_get_property (GObject *object, unsigned int prop_id, GValue *value, GParamSpec *pspec) { TranslateSession *session = TRANSLATE_SESSION(object); switch (prop_id) { case PROP_SERVICES: LOCK(session); g_slist_foreach(session->priv->services, (GFunc) g_object_ref, NULL); g_value_set_pointer(value, g_slist_copy(session->priv->services)); UNLOCK(session); break; case PROP_PAIRS: LOCK(session); g_slist_foreach(session->priv->pairs, (GFunc) g_object_ref, NULL); g_value_set_pointer(value, g_slist_copy(session->priv->pairs)); UNLOCK(session); break; case PROP_MAX_THREADS: LOCK(session); g_value_set_uint(value, session->priv->max_threads); UNLOCK(session); break; case PROP_MAX_RETRIES: LOCK(session); g_value_set_int(value, session->priv->max_retries); UNLOCK(session); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); break; } } /** * translate_session_get_services: * @session: a session. * * Gets the services used by session. When no longer needed, the list * should be freed with: * * * g_slist_foreach(list, (GFunc) g_object_unref, NULL); * g_slist_free(list); * * * Return value: the list of #TranslateService objects used by * @session. **/ GSList * translate_session_get_services (TranslateSession *session) { GSList *services; g_return_val_if_fail(TRANSLATE_IS_SESSION(session), NULL); g_object_get(G_OBJECT(session), "services", &services, NULL); return services; } /** * translate_session_get_pairs: * @session: a session. * * Gets the list of language pairs available for translations. When no longer * needed, the list should be freed with: * * * g_slist_foreach(list, (GFunc) g_object_unref, NULL); * g_slist_free(list); * * * Return value: the list of #TranslatePair objects available for * translations. **/ GSList * translate_session_get_pairs (TranslateSession *session) { GSList *pairs; g_return_val_if_fail(TRANSLATE_IS_SESSION(session), NULL); g_object_get(G_OBJECT(session), "pairs", &pairs, NULL); return pairs; } /** * translate_session_get_max_threads: * @session: a session. * * Gets the maximum number of concurrent threads to use for * translating text. * * Return value: the maximum number of threads. **/ unsigned int translate_session_get_max_threads (TranslateSession *session) { unsigned int max_threads; g_return_val_if_fail(TRANSLATE_IS_SESSION(session), 0); g_object_get(G_OBJECT(session), "max-threads", &max_threads, NULL); return max_threads; } /** * translate_session_get_max_retries: * @session: a session. * * Gets the maximum number of retries per text chunk. * * Return value: the maximum number of retries. **/ int translate_session_get_max_retries (TranslateSession *session) { int max_retries; g_return_val_if_fail(TRANSLATE_IS_SESSION(session), 0); g_object_get(G_OBJECT(session), "max-retries", &max_retries, NULL); return max_retries; } static GSList * translate_session_get_services_for_translation (TranslateSession *session, TranslatePairFlags flags, const char *from, const char *to) { GSList *services = NULL; GSList *l; g_return_val_if_fail(TRANSLATE_IS_SESSION(session), NULL); g_return_val_if_fail(from != NULL, NULL); g_return_val_if_fail(to != NULL, NULL); for (l = session->priv->services; l != NULL; l = l->next) { TranslateService *service = l->data; const GSList *pairs; TranslatePair *pair; pairs = translate_service_get_pairs(service); pair = translate_pairs_find(pairs, from, to); if (pair && translate_pair_get_flags(pair) & flags) services = g_slist_append(services, g_object_ref(service)); } return services; } /** * translate_session_translate_text: * @session: a session. * @text: a nul-terminated string. * @from: a RFC 3066 language tag. * @to: a RFC 3066 language tag. * @progress_func: a function to call when progressing, or %NULL. * @user_data: data to pass to @progress_func, or %NULL. * @err: a location to report errors, or %NULL. Any of the errors in * #TranslateSessionError, #TranslateError or other domains may occur. * * Translates text from the language @from into the language @to. * * If @progress_func is specified, it may be * called periodically to report progress. If, when called, * @progress_func returns %FALSE, the translation is cancelled as soon * as possible, %NULL is returned, and @err is set to an error of * domain #TRANSLATE_ERROR and code #TRANSLATE_ERROR_CANCELLED. * * Return value: the translated text on success, or %NULL on failure * (in such case @err is set). The returned string should be freed * when no longer needed. **/ char * translate_session_translate_text (TranslateSession *session, const char *text, const char *from, const char *to, TranslateProgressFunc progress_func, gpointer user_data, GError **err) { GSList *chunks; TranslateInfo info; int n_chunks; unsigned int max_threads; GThreadPool *pool; GSList *l; unsigned int max_chunk_len = 0; char *translated = NULL; g_return_val_if_fail(TRANSLATE_IS_SESSION(session), NULL); g_return_val_if_fail(text != NULL, NULL); g_return_val_if_fail(from != NULL, NULL); g_return_val_if_fail(to != NULL, NULL); LOCK(session); info.services = translate_session_get_services_for_translation(session, TRANSLATE_PAIR_TEXT, from, to); max_threads = session->priv->max_threads; UNLOCK(session); if (! info.services) { g_set_error(err, TRANSLATE_SESSION_ERROR, TRANSLATE_SESSION_ERROR_NO_SERVICE, _("no service provides %s to %s text translation"), translate_get_language_name(from), translate_get_language_name(to)); return NULL; } /* compute max_chunk_len */ for (l = info.services; l != NULL; l = l->next) { TranslateService *service = l->data; unsigned int service_max_chunk_len = translate_service_get_max_chunk_len(service); if (service_max_chunk_len != 0 && (max_chunk_len == 0 || service_max_chunk_len < max_chunk_len)) max_chunk_len = service_max_chunk_len; } chunks = translate_session_split(text, max_chunk_len); info.mutex = g_mutex_new(); info.progress_cond = progress_func ? g_cond_new() : NULL; info.session = session; info.chunks = NULL; info.from = from; info.to = to; info.err = NULL; n_chunks = g_slist_length(chunks); pool = g_thread_pool_new(translate_session_translate_thread, &info, MIN(max_threads, n_chunks), TRUE, err); if (! pool) goto end; for (l = chunks; l != NULL; l = l->next) { TranslateChunkInfo *chunk_info; GError *tmp_err = NULL; chunk_info = g_new0(TranslateChunkInfo, 1); chunk_info->info = &info; chunk_info->chunk = l->data; info.chunks = g_slist_append(info.chunks, chunk_info); g_thread_pool_push(pool, chunk_info, &tmp_err); if (tmp_err) { /* inform threads that an error has occurred */ g_mutex_lock(info.mutex); if (info.err) g_error_free(tmp_err); else info.err = tmp_err; g_mutex_unlock(info.mutex); /* finish */ goto pool_end; } } if (info.progress_cond) { g_mutex_lock(info.mutex); while (g_thread_pool_unprocessed(pool) > 0) { double progress = 0; GSList *l; int n_chunks; g_cond_wait(info.progress_cond, info.mutex); if (info.err) break; for (l = info.chunks, n_chunks = 0; l != NULL; l = l->next, n_chunks++) { TranslateChunkInfo *chunk_info = l->data; progress += chunk_info->progress; } progress /= n_chunks; if (! progress_func(progress, user_data)) { info.err = g_error_new(TRANSLATE_ERROR, TRANSLATE_ERROR_CANCELLED, _("cancelled by user")); break; } } g_mutex_unlock(info.mutex); } pool_end: g_thread_pool_free(pool, FALSE, TRUE); if (info.err) g_propagate_error(err, info.err); else { GString *string; string = g_string_new(NULL); for (l = info.chunks; l != NULL; l = l->next) { TranslateChunkInfo *chunk_info = l->data; g_string_append(string, chunk_info->translated); } translated = g_string_free(string, FALSE); } for (l = info.chunks; l != NULL; l = l->next) { TranslateChunkInfo *chunk_info = l->data; g_free(chunk_info->translated); g_free(chunk_info); } g_slist_free(info.chunks); end: g_slist_foreach(chunks, (GFunc) g_free, NULL); g_slist_free(chunks); g_mutex_free(info.mutex); if (info.progress_cond) g_cond_free(info.progress_cond); g_slist_foreach(info.services, (GFunc) g_object_unref, NULL); g_slist_free(info.services); return translated; } static void translate_session_translate_thread (gpointer data, gpointer user_data) { int max_retries; TranslateChunkInfo *chunk_info = data; TranslateInfo *info = user_data; int i; LOCK(info->session); max_retries = info->session->priv->max_retries; UNLOCK(info->session); for (i = 0; (i < max_retries || max_retries == -1) && ! chunk_info->translated; i++) { GSList *l; for (l = info->services; l != NULL && ! chunk_info->translated; l = l->next) { TranslateService *service = l->data; gboolean ret; GError *tmp_err = NULL; g_mutex_lock(info->mutex); if (info->err) { ret = TRUE; if (info->progress_cond) g_cond_signal(info->progress_cond); } else ret = FALSE; g_mutex_unlock(info->mutex); if (ret) return; chunk_info->translated = translate_service_translate_text(service, chunk_info->chunk, info->from, info->to, info->progress_cond ? translate_session_translate_progress_cb : NULL, info->progress_cond ? chunk_info : NULL, &tmp_err); if (! chunk_info->translated) { if (g_error_matches(tmp_err, TRANSLATE_ERROR, TRANSLATE_ERROR_CANCELLED)) { g_mutex_lock(info->mutex); if (info->err) g_error_free(tmp_err); else g_propagate_error(&info->err, tmp_err); if (info->progress_cond) g_cond_signal(info->progress_cond); g_mutex_unlock(info->mutex); return; } else { g_warning(_("unable to translate chunk using %s: %s"), translate_service_get_nick(service), tmp_err->message); g_error_free(tmp_err); } } } } g_mutex_lock(info->mutex); if (chunk_info->translated) { if (info->progress_cond) { chunk_info->progress = 1.0; g_cond_signal(info->progress_cond); } } else { if (! info->err) info->err = g_error_new(TRANSLATE_SESSION_ERROR, TRANSLATE_SESSION_ERROR_NO_SERVICE, _("no service could translate chunk")); if (info->progress_cond) g_cond_signal(info->progress_cond); } g_mutex_unlock(info->mutex); } static gboolean translate_session_translate_progress_cb (double progress, gpointer user_data) { TranslateChunkInfo *chunk_info = user_data; gboolean cont; g_mutex_lock(chunk_info->info->mutex); if (chunk_info->info->err) cont = FALSE; else { if (progress >= 0) chunk_info->progress = progress; g_cond_signal(chunk_info->info->progress_cond); cont = TRUE; } g_mutex_unlock(chunk_info->info->mutex); return cont; } static GSList * translate_session_split (const char *text, unsigned int max_chunk_len) { GSList *chunks = NULL; GString *chunk; int chunk_len; /* in UTF-8 chars */ g_return_val_if_fail(text != NULL, NULL); if (max_chunk_len == 0) /* unlimited */ return g_slist_append(chunks, g_strdup(text)); chunk = g_string_new(NULL); chunk_len = 0; while (*text) { char *sentence_end; /* ptr to char following end of sentence */ int sentence_len; /* in UTF-8 chars */ #define SENTENCE_TERMINATORS ".?!" #define CLAUSE_TERMINATORS ",;:\"'-) " sentence_end = translate_utf8_strpbrk(text, -1, SENTENCE_TERMINATORS); sentence_end = sentence_end ? sentence_end + 1 : strchr(text, 0); sentence_len = g_utf8_strlen(text, sentence_end - text); if (sentence_len + chunk_len > max_chunk_len) { if (chunk_len > 0) { chunks = g_slist_append(chunks, g_string_free(chunk, FALSE)); chunk = g_string_new(NULL); chunk_len = 0; } if (sentence_len > max_chunk_len) { /* still too long, fallback to clause */ char *chunk_end; char *clause_end; chunk_end = g_utf8_offset_to_pointer(text, max_chunk_len); clause_end = translate_utf8_strrpbrk(text, chunk_end - text, CLAUSE_TERMINATORS); if (clause_end) { sentence_end = clause_end + 1; sentence_len = g_utf8_strlen(text, sentence_end - text); } else { /* didn't work, just use max_chunk_len */ sentence_end = chunk_end; sentence_len = max_chunk_len; } } } g_string_append_len(chunk, text, sentence_end - text); text = sentence_end; chunk_len += sentence_len; } if (*chunk->str || ! chunks) chunks = g_slist_append(chunks, g_string_free(chunk, FALSE)); else g_string_free(chunk, TRUE); return chunks; } /** * translate_session_translate_web_page: * @session: a session. * @url: an URL. * @from: a RFC 3066 language tag. * @to: a RFC 3066 language tag. * @progress_func: a function to call when progressing, or %NULL. * @user_data: data to pass to @progress_func, or %NULL. * @err: a location to report errors, or %NULL. Any of the errors in * #TranslateSessionError, #TranslateError or other domains may occur. * * Translates a web page from the language @from into the language * @to. * * If @progress_func is specified, it may be * called periodically to report progress. If, when called, * @progress_func returns %FALSE, the translation is cancelled as soon * as possible, %NULL is returned, and @err is set to an error of * domain #TRANSLATE_ERROR and code #TRANSLATE_ERROR_CANCELLED. * * Return value: an URL pointing to the translated web page on * success, or %NULL on failure (in such case @err is set). The * returned string should be freed when no longer needed. **/ char * translate_session_translate_web_page (TranslateSession *session, const char *url, const char *from, const char *to, TranslateProgressFunc progress_func, gpointer user_data, GError **err) { GSList *services; char *translated_url = NULL; GSList *l; gboolean cancelled = FALSE; g_return_val_if_fail(TRANSLATE_IS_SESSION(session), NULL); g_return_val_if_fail(url != NULL, NULL); g_return_val_if_fail(from != NULL, NULL); g_return_val_if_fail(to != NULL, NULL); LOCK(session); services = translate_session_get_services_for_translation(session, TRANSLATE_PAIR_WEB_PAGE, from, to); UNLOCK(session); if (! services) { g_set_error(err, TRANSLATE_SESSION_ERROR, TRANSLATE_SESSION_ERROR_NO_SERVICE, _("no service provides %s to %s web page translation"), translate_get_language_name(from), translate_get_language_name(to)); return NULL; } for (l = services; l != NULL && ! translated_url; l = l->next) { TranslateService *service = l->data; GError *tmp_err = NULL; translated_url = translate_service_translate_web_page(service, url, from, to, progress_func, user_data, &tmp_err); if (! translated_url) { if (g_error_matches(tmp_err, TRANSLATE_ERROR, TRANSLATE_ERROR_CANCELLED)) { g_propagate_error(err, tmp_err); cancelled = TRUE; } else { g_warning(_("unable to translate web page using %s: %s"), translate_service_get_nick(service), tmp_err->message); g_error_free(tmp_err); } } } g_slist_foreach(services, (GFunc) g_object_unref, NULL); g_slist_free(services); if (! translated_url && ! cancelled) g_set_error(err, TRANSLATE_SESSION_ERROR, TRANSLATE_SESSION_ERROR_NO_SERVICE, _("no service could translate web page")); return translated_url; } /** * translate_session_new: * @services: a list of #TranslateService objects to use. See * translate_session_set_services(). * * Creates a new translation session, which will use @services for * performing translations. * * Return value: a new translation session. **/ TranslateSession * translate_session_new (const GSList *services) { return g_object_new(TRANSLATE_TYPE_SESSION, "services", services, NULL); } GQuark translate_session_error_quark (void) { return g_quark_from_static_string("translate-session-error"); }