Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
LibreTranslate
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Riße, Matthias
LibreTranslate
Commits
c29cecbb
Commit
c29cecbb
authored
3 years ago
by
PalmerAL
Browse files
Options
Downloads
Patches
Plain Diff
improve auto-detect for batch requests with multiple languages
parent
6ec94ee9
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
app/app.py
+38
-20
38 additions, 20 deletions
app/app.py
app/language.py
+8
-5
8 additions, 5 deletions
app/language.py
with
46 additions
and
25 deletions
app/app.py
+
38
−
20
View file @
c29cecbb
...
...
@@ -360,43 +360,61 @@ def create_app(args):
)
if
source_lang
==
"
auto
"
:
candidate_langs
=
detect_languages
(
q
)
if
args
.
debug
:
print
(
candidate_langs
)
source_lang
=
candidate_langs
[
0
][
"
language
"
]
source_langs
=
[]
if
batch
:
auto_detect_texts
=
q
else
:
auto_detect_texts
=
[
q
]
overall_candidates
=
detect_languages
(
q
)
for
text_to_check
in
auto_detect_texts
:
if
len
(
text_to_check
)
>
40
:
candidate_langs
=
detect_languages
(
text_to_check
)
else
:
# Unable to accurately detect languages for short texts
candidate_langs
=
overall_candidates
source_langs
.
append
(
candidate_langs
[
0
][
"
language
"
])
if
args
.
debug
:
print
(
text_to_check
,
candidate_langs
)
print
(
"
Auto detected: %s
"
%
candidate_langs
[
0
][
"
language
"
])
else
:
if
batch
:
source_langs
=
[
source_lang
for
text
in
q
]
else
:
source_langs
=
[
source_lang
]
if
args
.
debug
:
print
(
"
Auto detected: %s
"
%
source_lang
)
src_langs
=
[
next
(
iter
([
l
for
l
in
languages
if
l
.
code
==
source_lang
]),
None
)
for
source_lang
in
source_langs
]
for
idx
,
lang
in
enumerate
(
src_langs
):
if
lang
is
None
:
abort
(
400
,
description
=
"
%s is not supported
"
%
source_langs
[
idx
])
src_lang
=
next
(
iter
([
l
for
l
in
languages
if
l
.
code
==
source_lang
]),
None
)
tgt_lang
=
next
(
iter
([
l
for
l
in
languages
if
l
.
code
==
target_lang
]),
None
)
if
src_lang
is
None
:
abort
(
400
,
description
=
"
%s is not supported
"
%
source_lang
)
if
tgt_lang
is
None
:
abort
(
400
,
description
=
"
%s is not supported
"
%
target_lang
)
translator
=
src_lang
.
get_translation
(
tgt_lang
)
try
:
if
batch
:
results
=
[]
for
idx
,
text
in
enumerate
(
q
):
translator
=
src_langs
[
idx
].
get_translation
(
tgt_lang
)
results
.
append
(
translator
.
translate
(
transliterate
(
text
,
target_lang
=
source_langs
[
idx
])
))
return
jsonify
(
{
"
translatedText
"
:
[
translator
.
translate
(
transliterate
(
text
,
target_lang
=
source_lang
)
)
for
text
in
q
]
"
translatedText
"
:
results
}
)
else
:
translator
=
src_langs
[
0
].
get_translation
(
tgt_lang
)
return
jsonify
(
{
"
translatedText
"
:
translator
.
translate
(
transliterate
(
q
,
target_lang
=
source_lang
)
transliterate
(
q
,
target_lang
=
source_lang
s
[
0
]
)
)
}
)
...
...
This diff is collapsed.
Click to expand it.
app/language.py
+
8
−
5
View file @
c29cecbb
...
...
@@ -22,16 +22,19 @@ def detect_languages(text):
candidates
=
[]
for
t
in
text
:
try
:
candidates
.
extend
(
Detector
(
t
).
languages
)
d
=
Detector
(
t
).
languages
for
i
in
range
(
len
(
d
)):
d
[
i
].
text_length
=
len
(
t
)
candidates
.
extend
(
d
)
except
UnknownLanguage
:
pass
# total read bytes of the provided text
read_bytes
_total
=
sum
(
c
.
read_bytes
for
c
in
candidates
)
text_length
_total
=
sum
(
c
.
text_length
for
c
in
candidates
)
# only use candidates that are supported by argostranslate
candidate_langs
=
list
(
filter
(
lambda
l
:
l
.
read_bytes
!=
0
and
l
.
code
in
__lang_codes
,
candidates
)
filter
(
lambda
l
:
l
.
text_length
!=
0
and
l
.
code
in
__lang_codes
,
candidates
)
)
# this happens if no language could be detected
...
...
@@ -50,7 +53,7 @@ def detect_languages(text):
# if more than one is present, calculate the average confidence
lang
=
lc
[
0
]
lang
.
confidence
=
sum
(
l
.
confidence
for
l
in
lc
)
/
len
(
lc
)
lang
.
read_bytes
=
sum
(
l
.
read_bytes
for
l
in
lc
)
lang
.
text_length
=
sum
(
l
.
text_length
for
l
in
lc
)
temp_average_list
.
append
(
lang
)
elif
lc
:
# otherwise just add it to the temporary list
...
...
@@ -62,7 +65,7 @@ def detect_languages(text):
# sort the candidates descending based on the detected confidence
candidate_langs
.
sort
(
key
=
lambda
l
:
(
l
.
confidence
*
l
.
read_bytes
)
/
read_bytes
_total
,
reverse
=
True
key
=
lambda
l
:
(
l
.
confidence
*
l
.
text_length
)
/
text_length
_total
,
reverse
=
True
)
return
[{
"
confidence
"
:
l
.
confidence
,
"
language
"
:
l
.
code
}
for
l
in
candidate_langs
]
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment