Skip to content

Commit 55ce200

Browse files
yichunkcopybara-github
authored andcommitted
Make more options configurable via GpuArtisanConfig.
LiteRT-LM-PiperOrigin-RevId: 916303890
1 parent 47efe00 commit 55ce200

3 files changed

Lines changed: 22 additions & 0 deletions

File tree

runtime/executor/llm_executor_settings.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ std::ostream& operator<<(std::ostream& os, const GpuArtisanConfig& config) {
4343
os << "enable_external_embeddings: " << config.enable_external_embeddings
4444
<< "\n";
4545
os << "use_submodel: " << config.use_submodel << "\n";
46+
os << "prefer_texture_weights: " << config.prefer_texture_weights << "\n";
47+
os << "set_enable_host_mapped_pointer: "
48+
<< config.set_enable_host_mapped_pointer << "\n";
49+
os << "disallow_8bit_convs: " << config.disallow_8bit_convs << "\n";
4650
return os;
4751
}
4852

runtime/executor/llm_executor_settings.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,15 @@ struct GpuArtisanConfig {
7575

7676
// Whether the submodel should be used if available.
7777
bool use_submodel = false;
78+
79+
// Whether to prefer texture weights over buffers.
80+
bool prefer_texture_weights = true;
81+
82+
// Whether the backend should directly map host memory to the GPU if possible.
83+
bool set_enable_host_mapped_pointer = true;
84+
85+
// Performs f32 convolutions instead of any 8 bit convolutions.
86+
bool disallow_8bit_convs = true;
7887
};
7988

8089
std::ostream& operator<<(std::ostream& os, const GpuArtisanConfig& config);

runtime/executor/llm_executor_settings_test.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,9 @@ max_top_k: 40
200200
enable_decode_logits: 1
201201
enable_external_embeddings: 0
202202
use_submodel: 1
203+
prefer_texture_weights: 1
204+
set_enable_host_mapped_pointer: 1
205+
disallow_8bit_convs: 1
203206
)";
204207
EXPECT_EQ(oss.str(), expected_output);
205208
}
@@ -230,6 +233,9 @@ max_top_k: 40
230233
enable_decode_logits: 1
231234
enable_external_embeddings: 0
232235
use_submodel: 1
236+
prefer_texture_weights: 1
237+
set_enable_host_mapped_pointer: 1
238+
disallow_8bit_convs: 1
233239
234240
max_tokens: 1024
235241
activation_data_type: FLOAT16
@@ -298,6 +304,9 @@ max_top_k: 40
298304
enable_decode_logits: 1
299305
enable_external_embeddings: 0
300306
use_submodel: 1
307+
prefer_texture_weights: 1
308+
set_enable_host_mapped_pointer: 1
309+
disallow_8bit_convs: 1
301310
302311
max_tokens: 1024
303312
activation_data_type: FLOAT16

0 commit comments

Comments
 (0)